summaryrefslogtreecommitdiff
path: root/ipc/mqueue.c
blob: 5becca9be867ce029cb0f98667fac974303d188b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
/*
 * POSIX message queues filesystem for Linux.
 *
 * Copyright (C) 2003,2004  Krzysztof Benedyczak    (golbi@mat.uni.torun.pl)
 *                          Michal Wronski          (michal.wronski@gmail.com)
 *
 * Spinlocks:               Mohamed Abbas           (abbas.mohamed@intel.com)
 * Lockless receive & send, fd based notify:
 *			    Manfred Spraul	    (manfred@colorfullife.com)
 *
 * Audit:                   George Wilson           (ltcgcw@us.ibm.com)
 *
 * This file is released under the GPL.
 */

#include <linux/capability.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/fs_context.h>
#include <linux/namei.h>
#include <linux/sysctl.h>
#include <linux/poll.h>
#include <linux/mqueue.h>
#include <linux/msg.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/netlink.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/signal.h>
#include <linux/mutex.h>
#include <linux/nsproxy.h>
#include <linux/pid.h>
#include <linux/ipc_namespace.h>
#include <linux/user_namespace.h>
#include <linux/slab.h>
#include <linux/sched/wake_q.h>
#include <linux/sched/signal.h>
#include <linux/sched/user.h>

#include <net/sock.h>
#include "util.h"

struct mqueue_fs_context {
	struct ipc_namespace	*ipc_ns;
};

#define MQUEUE_MAGIC	0x19800202
#define DIRENT_SIZE	20
#define FILENT_SIZE	80

#define SEND		0
#define RECV		1

#define STATE_NONE	0
#define STATE_READY	1

struct posix_msg_tree_node {
	struct rb_node		rb_node;
	struct list_head	msg_list;
	int			priority;
};

/*
 * Locking:
 *
 * Accesses to a message queue are synchronized by acquiring info->lock.
 *
 * There are two notable exceptions:
 * - The actual wakeup of a sleeping task is performed using the wake_q
 *   framework. info->lock is already released when wake_up_q is called.
 * - The exit codepaths after sleeping check ext_wait_queue->state without
 *   any locks. If it is STATE_READY, then the syscall is completed without
 *   acquiring info->lock.
 *
 * MQ_BARRIER:
 * To achieve proper release/acquire memory barrier pairing, the state is set to
 * STATE_READY with smp_store_release(), and it is read with READ_ONCE followed
 * by smp_acquire__after_ctrl_dep(). In addition, wake_q_add_safe() is used.
 *
 * This prevents the following races:
 *
 * 1) With the simple wake_q_add(), the task could be gone already before
 *    the increase of the reference happens
 * Thread A
 *				Thread B
 * WRITE_ONCE(wait.state, STATE_NONE);
 * schedule_hrtimeout()
 *				wake_q_add(A)
 *				if (cmpxchg()) // success
 *				   ->state = STATE_READY (reordered)
 * <timeout returns>
 * if (wait.state == STATE_READY) return;
 * sysret to user space
 * sys_exit()
 *				get_task_struct() // UaF
 *
 * Solution: Use wake_q_add_safe() and perform the get_task_struct() before
 * the smp_store_release() that does ->state = STATE_READY.
 *
 * 2) Without proper _release/_acquire barriers, the woken up task
 *    could read stale data
 *
 * Thread A
 *				Thread B
 * do_mq_timedreceive
 * WRITE_ONCE(wait.state, STATE_NONE);
 * schedule_hrtimeout()
 *				state = STATE_READY;
 * <timeout returns>
 * if (wait.state == STATE_READY) return;
 * msg_ptr = wait.msg;		// Access to stale data!
 *				receiver->msg = message; (reordered)
 *
 * Solution: use _release and _acquire barriers.
 *
 * 3) There is intentionally no barrier when setting current->state
 *    to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the
 *    release memory barrier, and the wakeup is triggered when holding
 *    info->lock, i.e. spin_lock(&info->lock) provided a pairing
 *    acquire memory barrier.
 */

struct ext_wait_queue {		/* queue of sleeping tasks */
	struct task_struct *task;
	struct list_head list;
	struct msg_msg *msg;	/* ptr of loaded message */
	int state;		/* one of STATE_* values */
};

struct mqueue_inode_info {
	spinlock_t lock;
	struct inode vfs_inode;
	wait_queue_head_t wait_q;

	struct rb_root msg_tree;
	struct rb_node *msg_tree_rightmost;
	struct posix_msg_tree_node *node_cache;
	struct mq_attr attr;

	struct sigevent notify;
	struct pid *notify_owner;
	u32 notify_self_exec_id;
	struct user_namespace *notify_user_ns;
	struct ucounts *ucounts;	/* user who created, for accounting */
	struct sock *notify_sock;
	struct sk_buff *notify_cookie;

	/* for tasks waiting for free space and messages, respectively */
	struct ext_wait_queue e_wait_q[2];

	unsigned long qsize; /* size of queue in memory (sum of all msgs) */
};

static struct file_system_type mqueue_fs_type;
static const struct inode_operations mqueue_dir_inode_operations;
static const struct file_operations mqueue_file_operations;
static const struct super_operations mqueue_super_ops;
static const struct fs_context_operations mqueue_fs_context_ops;
static void remove_notification(struct mqueue_inode_info *info);

static struct kmem_cache *mqueue_inode_cachep;

static struct ctl_table_header *mq_sysctl_table;

static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
{
	return container_of(inode, struct mqueue_inode_info, vfs_inode);
}

/*
 * This routine should be called with the mq_lock held.
 */
static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode)
{
	return get_ipc_ns(inode->i_sb->s_fs_info);
}

static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
{
	struct ipc_namespace *ns;

	spin_lock(&mq_lock);
	ns = __get_ns_from_inode(inode);
	spin_unlock(&mq_lock);
	return ns;
}

/* Auxiliary functions to manipulate messages' list */
static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
{
	struct rb_node **p, *parent = NULL;
	struct posix_msg_tree_node *leaf;
	bool rightmost = true;

	p = &info->msg_tree.rb_node;
	while (*p) {
		parent = *p;
		leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);

		if (likely(leaf->priority == msg->m_type))
			goto insert_msg;
		else if (msg->m_type < leaf->priority) {
			p = &(*p)->rb_left;
			rightmost = false;
		} else
			p = &(*p)->rb_right;
	}
	if (info->node_cache) {
		leaf = info->node_cache;
		info->node_cache = NULL;
	} else {
		leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC);
		if (!leaf)
			return -ENOMEM;
		INIT_LIST_HEAD(&leaf->msg_list);
	}
	leaf->priority = msg->m_type;

	if (rightmost)
		info->msg_tree_rightmost = &leaf->rb_node;

	rb_link_node(&leaf->rb_node, parent, p);
	rb_insert_color(&leaf->rb_node, &info->msg_tree);
insert_msg:
	info->attr.mq_curmsgs++;
	info->qsize += msg->m_ts;
	list_add_tail(&msg->m_list, &leaf->msg_list);
	return 0;
}

static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
				  struct mqueue_inode_info *info)
{
	struct rb_node *node = &leaf->rb_node;

	if (info->msg_tree_rightmost == node)
		info->msg_tree_rightmost = rb_prev(node);

	rb_erase(node, &info->msg_tree);
	if (info->node_cache)
		kfree(leaf);
	else
		info->node_cache = leaf;
}

static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
{
	struct rb_node *parent = NULL;
	struct posix_msg_tree_node *leaf;
	struct msg_msg *msg;

try_again:
	/*
	 * During insert, low priorities go to the left and high to the
	 * right.  On receive, we want the highest priorities first, so
	 * walk all the way to the right.
	 */
	parent = info->msg_tree_rightmost;
	if (!parent) {
		if (info->attr.mq_curmsgs) {
			pr_warn_once("Inconsistency in POSIX message queue, "
				     "no tree element, but supposedly messages "
				     "should exist!\n");
			info->attr.mq_curmsgs = 0;
		}
		return NULL;
	}
	leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
	if (unlikely(list_empty(&leaf->msg_list))) {
		pr_warn_once("Inconsistency in POSIX message queue, "
			     "empty leaf node but we haven't implemented "
			     "lazy leaf delete!\n");
		msg_tree_erase(leaf, info);
		goto try_again;
	} else {
		msg = list_first_entry(&leaf->msg_list,
				       struct msg_msg, m_list);
		list_del(&msg->m_list);
		if (list_empty(&leaf->msg_list)) {
			msg_tree_erase(leaf, info);
		}
	}
	info->attr.mq_curmsgs--;
	info->qsize -= msg->m_ts;
	return msg;
}

static struct inode *mqueue_get_inode(struct super_block *sb,
		struct ipc_namespace *ipc_ns, umode_t mode,
		struct mq_attr *attr)
{
	struct inode *inode;
	int ret = -ENOMEM;

	inode = new_inode(sb);
	if (!inode)
		goto err;

	inode->i_ino = get_next_ino();
	inode->i_mode = mode;
	inode->i_uid = current_fsuid();
	inode->i_gid = current_fsgid();
	inode->i_mtime = inode->i_ctime = inode->i_atime = current_time(inode);

	if (S_ISREG(mode)) {
		struct mqueue_inode_info *info;
		unsigned long mq_bytes, mq_treesize;

		inode->i_fop = &mqueue_file_operations;
		inode->i_size = FILENT_SIZE;
		/* mqueue specific info */
		info = MQUEUE_I(inode);
		spin_lock_init(&info->lock);
		init_waitqueue_head(&info->wait_q);
		INIT_LIST_HEAD(&info->e_wait_q[0].list);
		INIT_LIST_HEAD(&info->e_wait_q[1].list);
		info->notify_owner = NULL;
		info->notify_user_ns = NULL;
		info->qsize = 0;
		info->ucounts = NULL;	/* set when all is ok */
		info->msg_tree = RB_ROOT;
		info->msg_tree_rightmost = NULL;
		info->node_cache = NULL;
		memset(&info->attr, 0, sizeof(info->attr));
		info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
					   ipc_ns->mq_msg_default);
		info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
					    ipc_ns->mq_msgsize_default);
		if (attr) {
			info->attr.mq_maxmsg = attr->mq_maxmsg;
			info->attr.mq_msgsize = attr->mq_msgsize;
		}
		/*
		 * We used to allocate a static array of pointers and account
		 * the size of that array as well as one msg_msg struct per
		 * possible message into the queue size. That's no longer
		 * accurate as the queue is now an rbtree and will grow and
		 * shrink depending on usage patterns.  We can, however, still
		 * account one msg_msg struct per message, but the nodes are
		 * allocated depending on priority usage, and most programs
		 * only use one, or a handful, of priorities.  However, since
		 * this is pinned memory, we need to assume worst case, so
		 * that means the min(mq_maxmsg, max_priorities) * struct
		 * posix_msg_tree_node.
		 */

		ret = -EINVAL;
		if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0)
			goto out_inode;
		if (capable(CAP_SYS_RESOURCE)) {
			if (info->attr.mq_maxmsg > HARD_MSGMAX ||
			    info->attr.mq_msgsize > HARD_MSGSIZEMAX)
				goto out_inode;
		} else {
			if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max ||
					info->attr.mq_msgsize > ipc_ns->mq_msgsize_max)
				goto out_inode;
		}
		ret = -EOVERFLOW;
		/* check for overflow */
		if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg)
			goto out_inode;
		mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
			min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
			sizeof(struct posix_msg_tree_node);
		mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize;
		if (mq_bytes + mq_treesize < mq_bytes)
			goto out_inode;
		mq_bytes += mq_treesize;
		info->ucounts = get_ucounts(current_ucounts());
		if (info->ucounts) {
			long msgqueue;

			spin_lock(&mq_lock);
			msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
			if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
				dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
				spin_unlock(&mq_lock);
				put_ucounts(info->ucounts);
				info->ucounts = NULL;
				/* mqueue_evict_inode() releases info->messages */
				ret = -EMFILE;
				goto out_inode;
			}
			spin_unlock(&mq_lock);
		}
	} else if (S_ISDIR(mode)) {
		inc_nlink(inode);
		/* Some things misbehave if size == 0 on a directory */
		inode->i_size = 2 * DIRENT_SIZE;
		inode->i_op = &mqueue_dir_inode_operations;
		inode->i_fop = &simple_dir_operations;
	}

	return inode;
out_inode:
	iput(inode);
err:
	return ERR_PTR(ret);
}

static int mqueue_fill_super(struct super_block *sb, struct fs_context *fc)
{
	struct inode *inode;
	struct ipc_namespace *ns = sb->s_fs_info;

	sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
	sb->s_blocksize = PAGE_SIZE;
	sb->s_blocksize_bits = PAGE_SHIFT;
	sb->s_magic = MQUEUE_MAGIC;
	sb->s_op = &mqueue_super_ops;

	inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	sb->s_root = d_make_root(inode);
	if (!sb->s_root)
		return -ENOMEM;
	return 0;
}

static int mqueue_get_tree(struct fs_context *fc)
{
	struct mqueue_fs_context *ctx = fc->fs_private;

	return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns);
}

static void mqueue_fs_context_free(struct fs_context *fc)
{
	struct mqueue_fs_context *ctx = fc->fs_private;

	put_ipc_ns(ctx->ipc_ns);
	kfree(ctx);
}

static int mqueue_init_fs_context(struct fs_context *fc)
{
	struct mqueue_fs_context *ctx;

	ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
	put_user_ns(fc->user_ns);
	fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
	fc->fs_private = ctx;
	fc->ops = &mqueue_fs_context_ops;
	return 0;
}

static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
{
	struct mqueue_fs_context *ctx;
	struct fs_context *fc;
	struct vfsmount *mnt;

	fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT);
	if (IS_ERR(fc))
		return ERR_CAST(fc);

	ctx = fc->fs_private;
	put_ipc_ns(ctx->ipc_ns);
	ctx->ipc_ns = get_ipc_ns(ns);
	put_user_ns(fc->user_ns);
	fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);

	mnt = fc_mount(fc);
	put_fs_context(fc);
	return mnt;
}

static void init_once(void *foo)
{
	struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;

	inode_init_once(&p->vfs_inode);
}

static struct inode *mqueue_alloc_inode(struct super_block *sb)
{
	struct mqueue_inode_info *ei;

	ei = kmem_cache_alloc(mqueue_inode_cachep, GFP_KERNEL);
	if (!ei)
		return NULL;
	return &ei->vfs_inode;
}

static void mqueue_free_inode(struct inode *inode)
{
	kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
}

static void mqueue_evict_inode(struct inode *inode)
{
	struct mqueue_inode_info *info;
	struct ipc_namespace *ipc_ns;
	struct msg_msg *msg, *nmsg;
	LIST_HEAD(tmp_msg);

	clear_inode(inode);

	if (S_ISDIR(inode->i_mode))
		return;

	ipc_ns = get_ns_from_inode(inode);
	info = MQUEUE_I(inode);
	spin_lock(&info->lock);
	while ((msg = msg_get(info)) != NULL)
		list_add_tail(&msg->m_list, &tmp_msg);
	kfree(info->node_cache);
	spin_unlock(&info->lock);

	list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) {
		list_del(&msg->m_list);
		free_msg(msg);
	}

	if (info->ucounts) {
		unsigned long mq_bytes, mq_treesize;

		/* Total amount of bytes accounted for the mqueue */
		mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
			min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
			sizeof(struct posix_msg_tree_node);

		mq_bytes = mq_treesize + (info->attr.mq_maxmsg *
					  info->attr.mq_msgsize);

		spin_lock(&mq_lock);
		dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
		/*
		 * get_ns_from_inode() ensures that the
		 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
		 * to which we now hold a reference, or it is NULL.
		 * We can't put it here under mq_lock, though.
		 */
		if (ipc_ns)
			ipc_ns->mq_queues_count--;
		spin_unlock(&mq_lock);
		put_ucounts(info->ucounts);
		info->ucounts = NULL;
	}
	if (ipc_ns)
		put_ipc_ns(ipc_ns);
}

static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
{
	struct inode *dir = dentry->d_parent->d_inode;
	struct inode *inode;
	struct mq_attr *attr = arg;
	int error;
	struct ipc_namespace *ipc_ns;

	spin_lock(&mq_lock);
	ipc_ns = __get_ns_from_inode(dir);
	if (!ipc_ns) {
		error = -EACCES;
		goto out_unlock;
	}

	if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
	    !capable(CAP_SYS_RESOURCE)) {
		error = -ENOSPC;
		goto out_unlock;
	}
	ipc_ns->mq_queues_count++;
	spin_unlock(&mq_lock);

	inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
	if (IS_ERR(inode)) {
		error = PTR_ERR(inode);
		spin_lock(&mq_lock);
		ipc_ns->mq_queues_count--;
		goto out_unlock;
	}

	put_ipc_ns(ipc_ns);
	dir->i_size += DIRENT_SIZE;
	dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);

	d_instantiate(dentry, inode);
	dget(dentry);
	return 0;
out_unlock:
	spin_unlock(&mq_lock);
	if (ipc_ns)
		put_ipc_ns(ipc_ns);
	return error;
}

static int mqueue_create(struct user_namespace *mnt_userns, struct inode *dir,
			 struct dentry *dentry, umode_t mode, bool excl)
{
	return mqueue_create_attr(dentry, mode, NULL);
}

static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
{
	struct inode *inode = d_inode(dentry);

	dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
	dir->i_size -= DIRENT_SIZE;
	drop_nlink(inode);
	dput(dentry);
	return 0;
}

/*
*	This is routine for system read from queue file.
*	To avoid mess with doing here some sort of mq_receive we allow
*	to read only queue size & notification info (the only values
*	that are interesting from user point of view and aren't accessible
*	through std routines)
*/
static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
				size_t count, loff_t *off)
{
	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
	char buffer[FILENT_SIZE];
	ssize_t ret;

	spin_lock(&info->lock);
	snprintf(buffer, sizeof(buffer),
			"QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n",
			info->qsize,
			info->notify_owner ? info->notify.sigev_notify : 0,
			(info->notify_owner &&
			 info->notify.sigev_notify == SIGEV_SIGNAL) ?
				info->notify.sigev_signo : 0,
			pid_vnr(info->notify_owner));
	spin_unlock(&info->lock);
	buffer[sizeof(buffer)-1] = '\0';

	ret = simple_read_from_buffer(u_data, count, off, buffer,
				strlen(buffer));
	if (ret <= 0)
		return ret;

	file_inode(filp)->i_atime = file_inode(filp)->i_ctime = current_time(file_inode(filp));
	return ret;
}

static int mqueue_flush_file(struct file *filp, fl_owner_t id)
{
	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));

	spin_lock(&info->lock);
	if (task_tgid(current) == info->notify_owner)
		remove_notification(info);

	spin_unlock(&info->lock);
	return 0;
}

static __poll_t mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
{
	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
	__poll_t retval = 0;

	poll_wait(filp, &info->wait_q, poll_tab);

	spin_lock(&info->lock);
	if (info->attr.mq_curmsgs)
		retval = EPOLLIN | EPOLLRDNORM;

	if (info->attr.mq_curmsgs < info->attr.mq_maxmsg)
		retval |= EPOLLOUT | EPOLLWRNORM;
	spin_unlock(&info->lock);

	return retval;
}

/* Adds current to info->e_wait_q[sr] before element with smaller prio */
static void wq_add(struct mqueue_inode_info *info, int sr,
			struct ext_wait_queue *ewp)
{
	struct ext_wait_queue *walk;

	list_for_each_entry(walk, &info->e_wait_q[sr].list, list) {
		if (walk->task->prio <= current->prio) {
			list_add_tail(&ewp->list, &walk->list);
			return;
		}
	}
	list_add_tail(&ewp->list, &info->e_wait_q[sr].list);
}

/*
 * Puts current task to sleep. Caller must hold queue lock. After return
 * lock isn't held.
 * sr: SEND or RECV
 */
static int wq_sleep(struct mqueue_inode_info *info, int sr,
		    ktime_t *timeout, struct ext_wait_queue *ewp)
	__releases(&info->lock)
{
	int retval;
	signed long time;

	wq_add(info, sr, ewp);

	for (;;) {
		/* memory barrier not required, we hold info->lock */
		__set_current_state(TASK_INTERRUPTIBLE);

		spin_unlock(&info->lock);
		time = schedule_hrtimeout_range_clock(timeout, 0,
			HRTIMER_MODE_ABS, CLOCK_REALTIME);

		if (READ_ONCE(ewp->state) == STATE_READY) {
			/* see MQ_BARRIER for purpose/pairing */
			smp_acquire__after_ctrl_dep();
			retval = 0;
			goto out;
		}
		spin_lock(&info->lock);

		/* we hold info->lock, so no memory barrier required */
		if (READ_ONCE(ewp->state) == STATE_READY) {
			retval = 0;
			goto out_unlock;
		}
		if (signal_pending(current)) {
			retval = -ERESTARTSYS;
			break;
		}
		if (time == 0) {
			retval = -ETIMEDOUT;
			break;
		}
	}
	list_del(&ewp->list);
out_unlock:
	spin_unlock(&info->lock);
out:
	return retval;
}

/*
 * Returns waiting task that should be serviced first or NULL if none exists
 */
static struct ext_wait_queue *wq_get_first_waiter(
		struct mqueue_inode_info *info, int sr)
{
	struct list_head *ptr;

	ptr = info->e_wait_q[sr].list.prev;
	if (ptr == &info->e_wait_q[sr].list)
		return NULL;
	return list_entry(ptr, struct ext_wait_queue, list);
}


static inline void set_cookie(struct sk_buff *skb, char code)
{
	((char *)skb->data)[NOTIFY_COOKIE_LEN-1] = code;
}

/*
 * The next function is only to split too long sys_mq_timedsend
 */
static void __do_notify(struct mqueue_inode_info *info)
{
	/* notification
	 * invoked when there is registered process and there isn't process
	 * waiting synchronously for message AND state of queue changed from
	 * empty to not empty. Here we are sure that no one is waiting
	 * synchronously. */
	if (info->notify_owner &&
	    info->attr.mq_curmsgs == 1) {
		switch (info->notify.sigev_notify) {
		case SIGEV_NONE:
			break;
		case SIGEV_SIGNAL: {
			struct kernel_siginfo sig_i;
			struct task_struct *task;

			/* do_mq_notify() accepts sigev_signo == 0, why?? */
			if (!info->notify.sigev_signo)
				break;

			clear_siginfo(&sig_i);
			sig_i.si_signo = info->notify.sigev_signo;
			sig_i.si_errno = 0;
			sig_i.si_code = SI_MESGQ;
			sig_i.si_value = info->notify.sigev_value;
			rcu_read_lock();
			/* map current pid/uid into info->owner's namespaces */
			sig_i.si_pid = task_tgid_nr_ns(current,
						ns_of_pid(info->notify_owner));
			sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
						current_uid());
			/*
			 * We can't use kill_pid_info(), this signal should
			 * bypass check_kill_permission(). It is from kernel
			 * but si_fromuser() can't know this.
			 * We do check the self_exec_id, to avoid sending
			 * signals to programs that don't expect them.
			 */
			task = pid_task(info->notify_owner, PIDTYPE_TGID);
			if (task && task->self_exec_id ==
						info->notify_self_exec_id) {
				do_send_sig_info(info->notify.sigev_signo,
						&sig_i, task, PIDTYPE_TGID);
			}
			rcu_read_unlock();
			break;
		}
		case SIGEV_THREAD:
			set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
			netlink_sendskb(info->notify_sock, info->notify_cookie);
			break;
		}
		/* after notification unregisters process */
		put_pid(info->notify_owner);
		put_user_ns(info->notify_user_ns);
		info->notify_owner = NULL;
		info->notify_user_ns = NULL;
	}
	wake_up(&info->wait_q);
}

static int prepare_timeout(const struct __kernel_timespec __user *u_abs_timeout,
			   struct timespec64 *ts)
{
	if (get_timespec64(ts, u_abs_timeout))
		return -EFAULT;
	if (!timespec64_valid(ts))
		return -EINVAL;
	return 0;
}

static void remove_notification(struct mqueue_inode_info *info)
{
	if (info->notify_owner != NULL &&
	    info->notify.sigev_notify == SIGEV_THREAD) {
		set_cookie(info->notify_cookie, NOTIFY_REMOVED);
		netlink_sendskb(info->notify_sock, info->notify_cookie);
	}
	put_pid(info->notify_owner);
	put_user_ns(info->notify_user_ns);
	info->notify_owner = NULL;
	info->notify_user_ns = NULL;
}

static int prepare_open(struct dentry *dentry, int oflag, int ro,
			umode_t mode, struct filename *name,
			struct mq_attr *attr)
{
	static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
						  MAY_READ | MAY_WRITE };
	int acc;

	if (d_really_is_negative(dentry)) {
		if (!(oflag & O_CREAT))
			return -ENOENT;
		if (ro)
			return ro;
		audit_inode_parent_hidden(name, dentry->d_parent);
		return vfs_mkobj(dentry, mode & ~current_umask(),
				  mqueue_create_attr, attr);
	}
	/* it already existed */
	audit_inode(name, dentry, 0);
	if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
		return -EEXIST;
	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
		return -EINVAL;
	acc = oflag2acc[oflag & O_ACCMODE];
	return inode_permission(&init_user_ns, d_inode(dentry), acc);
}

static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
		      struct mq_attr *attr)
{
	struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt;
	struct dentry *root = mnt->mnt_root;
	struct filename *name;
	struct path path;
	int fd, error;
	int ro;

	audit_mq_open(oflag, mode, attr);

	if (IS_ERR(name = getname(u_name)))
		return PTR_ERR(name);

	fd = get_unused_fd_flags(O_CLOEXEC);
	if (fd < 0)
		goto out_putname;

	ro = mnt_want_write(mnt);	/* we'll drop it in any case */
	inode_lock(d_inode(root));
	path.dentry = lookup_one_len(name->name, root, strlen(name->name));
	if (IS_ERR(path.dentry)) {
		error = PTR_ERR(path.dentry);
		goto out_putfd;
	}
	path.mnt = mntget(mnt);
	error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
	if (!error) {
		struct file *file = dentry_open(&path, oflag, current_cred());
		if (!IS_ERR(file))
			fd_install(fd, file);
		else
			error = PTR_ERR(file);
	}
	path_put(&path);
out_putfd:
	if (error) {
		put_unused_fd(fd);
		fd = error;
	}
	inode_unlock(d_inode(root));
	if (!ro)
		mnt_drop_write(mnt);
out_putname:
	putname(name);
	return fd;
}

SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
		struct mq_attr __user *, u_attr)
{
	struct mq_attr attr;
	if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
		return -EFAULT;

	return do_mq_open(u_name, oflag, mode, u_attr ? &attr : NULL);
}

SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
{
	int err;
	struct filename *name;
	struct dentry *dentry;
	struct inode *inode = NULL;
	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
	struct vfsmount *mnt = ipc_ns->mq_mnt;

	name = getname(u_name);
	if (IS_ERR(name))
		return PTR_ERR(name);

	audit_inode_parent_hidden(name, mnt->mnt_root);
	err = mnt_want_write(mnt);
	if (err)
		goto out_name;
	inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
	dentry = lookup_one_len(name->name, mnt->mnt_root,
				strlen(name->name));
	if (IS_ERR(dentry)) {
		err = PTR_ERR(dentry);
		goto out_unlock;
	}

	inode = d_inode(dentry);
	if (!inode) {
		err = -ENOENT;
	} else {
		ihold(inode);
		err = vfs_unlink(&init_user_ns, d_inode(dentry->d_parent),
				 dentry, NULL);
	}
	dput(dentry);

out_unlock:
	inode_unlock(d_inode(mnt->mnt_root));
	if (inode)
		iput(inode);
	mnt_drop_write(mnt);
out_name:
	putname(name);

	return err;
}

/* Pipelined send and receive functions.
 *
 * If a receiver finds no waiting message, then it registers itself in the
 * list of waiting receivers. A sender checks that list before adding the new
 * message into the message array. If there is a waiting receiver, then it
 * bypasses the message array and directly hands the message over to the
 * receiver. The receiver accepts the message and returns without grabbing the
 * queue spinlock:
 *
 * - Set pointer to message.
 * - Queue the receiver task for later wakeup (without the info->lock).
 * - Update its state to STATE_READY. Now the receiver can continue.
 * - Wake up the process after the lock is dropped. Should the process wake up
 *   before this wakeup (due to a timeout or a signal) it will either see
 *   STATE_READY and continue or acquire the lock to check the state again.
 *
 * The same algorithm is used for senders.
 */

static inline void __pipelined_op(struct wake_q_head *wake_q,
				  struct mqueue_inode_info *info,
				  struct ext_wait_queue *this)
{
	struct task_struct *task;

	list_del(&this->list);
	task = get_task_struct(this->task);

	/* see MQ_BARRIER for purpose/pairing */
	smp_store_release(&this->state, STATE_READY);
	wake_q_add_safe(wake_q, task);
}

/* pipelined_send() - send a message directly to the task waiting in
 * sys_mq_timedreceive() (without inserting message into a queue).
 */
static inline void pipelined_send(struct wake_q_head *wake_q,
				  struct mqueue_inode_info *info,
				  struct msg_msg *message,
				  struct ext_wait_queue *receiver)
{
	receiver->msg = message;
	__pipelined_op(wake_q, info, receiver);
}

/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
 * gets its message and put to the queue (we have one free place for sure). */
static inline void pipelined_receive(struct wake_q_head *wake_q,
				     struct mqueue_inode_info *info)
{
	struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);

	if (!sender) {
		/* for poll */
		wake_up_interruptible(&info->wait_q);
		return;
	}
	if (msg_insert(sender->msg, info))
		return;

	__pipelined_op(wake_q, info, sender);
}

static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
		size_t msg_len, unsigned int msg_prio,
		struct timespec64 *ts)
{
	struct fd f;
	struct inode *inode;
	struct ext_wait_queue wait;
	struct ext_wait_queue *receiver;
	struct msg_msg *msg_ptr;
	struct mqueue_inode_info *info;
	ktime_t expires, *timeout = NULL;
	struct posix_msg_tree_node *new_leaf = NULL;
	int ret = 0;
	DEFINE_WAKE_Q(wake_q);

	if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
		return -EINVAL;

	if (ts) {
		expires = timespec64_to_ktime(*ts);
		timeout = &expires;
	}

	audit_mq_sendrecv(mqdes, msg_len, msg_prio, ts);

	f = fdget(mqdes);
	if (unlikely(!f.file)) {
		ret = -EBADF;
		goto out;
	}

	inode = file_inode(f.file);
	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
		ret = -EBADF;
		goto out_fput;
	}
	info = MQUEUE_I(inode);
	audit_file(f.file);

	if (unlikely(!(f.file->f_mode & FMODE_WRITE))) {
		ret = -EBADF;
		goto out_fput;
	}

	if (unlikely(msg_len > info->attr.mq_msgsize)) {
		ret = -EMSGSIZE;
		goto out_fput;
	}

	/* First try to allocate memory, before doing anything with
	 * existing queues. */
	msg_ptr = load_msg(u_msg_ptr, msg_len);
	if (IS_ERR(msg_ptr)) {
		ret = PTR_ERR(msg_ptr);
		goto out_fput;
	}
	msg_ptr->m_ts = msg_len;
	msg_ptr->m_type = msg_prio;

	/*
	 * msg_insert really wants us to have a valid, spare node struct so
	 * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
	 * fall back to that if necessary.
	 */
	if (!info->node_cache)
		new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);

	spin_lock(&info->lock);

	if (!info->node_cache && new_leaf) {
		/* Save our speculative allocation into the cache */
		INIT_LIST_HEAD(&new_leaf->msg_list);
		info->node_cache = new_leaf;
		new_leaf = NULL;
	} else {
		kfree(new_leaf);
	}

	if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
		if (f.file->f_flags & O_NONBLOCK) {
			ret = -EAGAIN;
		} else {
			wait.task = current;
			wait.msg = (void *) msg_ptr;

			/* memory barrier not required, we hold info->lock */
			WRITE_ONCE(wait.state, STATE_NONE);
			ret = wq_sleep(info, SEND, timeout, &wait);
			/*
			 * wq_sleep must be called with info->lock held, and
			 * returns with the lock released
			 */
			goto out_free;
		}
	} else {
		receiver = wq_get_first_waiter(info, RECV);
		if (receiver) {
			pipelined_send(&wake_q, info, msg_ptr, receiver);
		} else {
			/* adds message to the queue */
			ret = msg_insert(msg_ptr, info);
			if (ret)
				goto out_unlock;
			__do_notify(info);
		}
		inode->i_atime = inode->i_mtime = inode->i_ctime =
				current_time(inode);
	}
out_unlock:
	spin_unlock(&info->lock);
	wake_up_q(&wake_q);
out_free:
	if (ret)
		free_msg(msg_ptr);
out_fput:
	fdput(f);
out:
	return ret;
}

static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
		size_t msg_len, unsigned int __user *u_msg_prio,
		struct timespec64 *ts)
{
	ssize_t ret;
	struct msg_msg *msg_ptr;
	struct fd f;
	struct inode *inode;
	struct mqueue_inode_info *info;
	struct ext_wait_queue wait;
	ktime_t expires, *timeout = NULL;
	struct posix_msg_tree_node *new_leaf = NULL;

	if (ts) {
		expires = timespec64_to_ktime(*ts);
		timeout = &expires;
	}

	audit_mq_sendrecv(mqdes, msg_len, 0, ts);

	f = fdget(mqdes);
	if (unlikely(!f.file)) {
		ret = -EBADF;
		goto out;
	}

	inode = file_inode(f.file);
	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
		ret = -EBADF;
		goto out_fput;
	}
	info = MQUEUE_I(inode);
	audit_file(f.file);

	if (unlikely(!(f.file->f_mode & FMODE_READ))) {
		ret = -EBADF;
		goto out_fput;
	}

	/* checks if buffer is big enough */
	if (unlikely(msg_len < info->attr.mq_msgsize)) {
		ret = -EMSGSIZE;
		goto out_fput;
	}

	/*
	 * msg_insert really wants us to have a valid, spare node struct so
	 * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
	 * fall back to that if necessary.
	 */
	if (!info->node_cache)
		new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);

	spin_lock(&info->lock);

	if (!info->node_cache && new_leaf) {
		/* Save our speculative allocation into the cache */
		INIT_LIST_HEAD(&new_leaf->msg_list);
		info->node_cache = new_leaf;
	} else {
		kfree(new_leaf);
	}

	if (info->attr.mq_curmsgs == 0) {
		if (f.file->f_flags & O_NONBLOCK) {
			spin_unlock(&info->lock);
			ret = -EAGAIN;
		} else {
			wait.task = current;

			/* memory barrier not required, we hold info->lock */
			WRITE_ONCE(wait.state, STATE_NONE);
			ret = wq_sleep(info, RECV, timeout, &wait);
			msg_ptr = wait.msg;
		}
	} else {
		DEFINE_WAKE_Q(wake_q);

		msg_ptr = msg_get(info);

		inode->i_atime = inode->i_mtime = inode->i_ctime =
				current_time(inode);

		/* There is now free space in queue. */
		pipelined_receive(&wake_q, info);
		spin_unlock(&info->lock);
		wake_up_q(&wake_q);
		ret = 0;
	}
	if (ret == 0) {
		ret = msg_ptr->m_ts;

		if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) ||
			store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) {
			ret = -EFAULT;
		}
		free_msg(msg_ptr);
	}
out_fput:
	fdput(f);
out:
	return ret;
}

SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
		size_t, msg_len, unsigned int, msg_prio,
		const struct __kernel_timespec __user *, u_abs_timeout)
{
	struct timespec64 ts, *p = NULL;
	if (u_abs_timeout) {
		int res = prepare_timeout(u_abs_timeout, &ts);
		if (res)
			return res;
		p = &ts;
	}
	return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
}

SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
		size_t, msg_len, unsigned int __user *, u_msg_prio,
		const struct __kernel_timespec __user *, u_abs_timeout)
{
	struct timespec64 ts, *p = NULL;
	if (u_abs_timeout) {
		int res = prepare_timeout(u_abs_timeout, &ts);
		if (res)
			return res;
		p = &ts;
	}
	return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
}

/*
 * Notes: the case when user wants us to deregister (with NULL as pointer)
 * and he isn't currently owner of notification, will be silently discarded.
 * It isn't explicitly defined in the POSIX.
 */
static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification)
{
	int ret;
	struct fd f;
	struct sock *sock;
	struct inode *inode;
	struct mqueue_inode_info *info;
	struct sk_buff *nc;

	audit_mq_notify(mqdes, notification);

	nc = NULL;
	sock = NULL;
	if (notification != NULL) {
		if (unlikely(notification->sigev_notify != SIGEV_NONE &&
			     notification->sigev_notify != SIGEV_SIGNAL &&
			     notification->sigev_notify != SIGEV_THREAD))
			return -EINVAL;
		if (notification->sigev_notify == SIGEV_SIGNAL &&
			!valid_signal(notification->sigev_signo)) {
			return -EINVAL;
		}
		if (notification->sigev_notify == SIGEV_THREAD) {
			long timeo;

			/* create the notify skb */
			nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
			if (!nc)
				return -ENOMEM;

			if (copy_from_user(nc->data,
					notification->sigev_value.sival_ptr,
					NOTIFY_COOKIE_LEN)) {
				ret = -EFAULT;
				goto free_skb;
			}

			/* TODO: add a header? */
			skb_put(nc, NOTIFY_COOKIE_LEN);
			/* and attach it to the socket */
retry:
			f = fdget(notification->sigev_signo);
			if (!f.file) {
				ret = -EBADF;
				goto out;
			}
			sock = netlink_getsockbyfilp(f.file);
			fdput(f);
			if (IS_ERR(sock)) {
				ret = PTR_ERR(sock);
				goto free_skb;
			}

			timeo = MAX_SCHEDULE_TIMEOUT;
			ret = netlink_attachskb(sock, nc, &timeo, NULL);
			if (ret == 1) {
				sock = NULL;
				goto retry;
			}
			if (ret)
				return ret;
		}
	}

	f = fdget(mqdes);
	if (!f.file) {
		ret = -EBADF;
		goto out;
	}

	inode = file_inode(f.file);
	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
		ret = -EBADF;
		goto out_fput;
	}
	info = MQUEUE_I(inode);

	ret = 0;
	spin_lock(&info->lock);
	if (notification == NULL) {
		if (info->notify_owner == task_tgid(current)) {
			remove_notification(info);
			inode->i_atime = inode->i_ctime = current_time(inode);
		}
	} else if (info->notify_owner != NULL) {
		ret = -EBUSY;
	} else {
		switch (notification->sigev_notify) {
		case SIGEV_NONE:
			info->notify.sigev_notify = SIGEV_NONE;
			break;
		case SIGEV_THREAD:
			info->notify_sock = sock;
			info->notify_cookie = nc;
			sock = NULL;
			nc = NULL;
			info->notify.sigev_notify = SIGEV_THREAD;
			break;
		case SIGEV_SIGNAL:
			info->notify.sigev_signo = notification->sigev_signo;
			info->notify.sigev_value = notification->sigev_value;
			info->notify.sigev_notify = SIGEV_SIGNAL;
			info->notify_self_exec_id = current->self_exec_id;
			break;
		}

		info->notify_owner = get_pid(task_tgid(current));
		info->notify_user_ns = get_user_ns(current_user_ns());
		inode->i_atime = inode->i_ctime = current_time(inode);
	}
	spin_unlock(&info->lock);
out_fput:
	fdput(f);
out:
	if (sock)
		netlink_detachskb(sock, nc);
	else
free_skb:
		dev_kfree_skb(nc);

	return ret;
}

SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
		const struct sigevent __user *, u_notification)
{
	struct sigevent n, *p = NULL;
	if (u_notification) {
		if (copy_from_user(&n, u_notification, sizeof(struct sigevent)))
			return -EFAULT;
		p = &n;
	}
	return do_mq_notify(mqdes, p);
}

static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old)
{
	struct fd f;
	struct inode *inode;
	struct mqueue_inode_info *info;

	if (new && (new->mq_flags & (~O_NONBLOCK)))
		return -EINVAL;

	f = fdget(mqdes);
	if (!f.file)
		return -EBADF;

	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
		fdput(f);
		return -EBADF;
	}

	inode = file_inode(f.file);
	info = MQUEUE_I(inode);

	spin_lock(&info->lock);

	if (old) {
		*old = info->attr;
		old->mq_flags = f.file->f_flags & O_NONBLOCK;
	}
	if (new) {
		audit_mq_getsetattr(mqdes, new);
		spin_lock(&f.file->f_lock);
		if (new->mq_flags & O_NONBLOCK)
			f.file->f_flags |= O_NONBLOCK;
		else
			f.file->f_flags &= ~O_NONBLOCK;
		spin_unlock(&f.file->f_lock);

		inode->i_atime = inode->i_ctime = current_time(inode);
	}

	spin_unlock(&info->lock);
	fdput(f);
	return 0;
}

SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
		const struct mq_attr __user *, u_mqstat,
		struct mq_attr __user *, u_omqstat)
{
	int ret;
	struct mq_attr mqstat, omqstat;
	struct mq_attr *new = NULL, *old = NULL;

	if (u_mqstat) {
		new = &mqstat;
		if (copy_from_user(new, u_mqstat, sizeof(struct mq_attr)))
			return -EFAULT;
	}
	if (u_omqstat)
		old = &omqstat;

	ret = do_mq_getsetattr(mqdes, new, old);
	if (ret || !old)
		return ret;

	if (copy_to_user(u_omqstat, old, sizeof(struct mq_attr)))
		return -EFAULT;
	return 0;
}

#ifdef CONFIG_COMPAT

struct compat_mq_attr {
	compat_long_t mq_flags;      /* message queue flags		     */
	compat_long_t mq_maxmsg;     /* maximum number of messages	     */
	compat_long_t mq_msgsize;    /* maximum message size		     */
	compat_long_t mq_curmsgs;    /* number of messages currently queued  */
	compat_long_t __reserved[4]; /* ignored for input, zeroed for output */
};

static inline int get_compat_mq_attr(struct mq_attr *attr,
			const struct compat_mq_attr __user *uattr)
{
	struct compat_mq_attr v;

	if (copy_from_user(&v, uattr, sizeof(*uattr)))
		return -EFAULT;

	memset(attr, 0, sizeof(*attr));
	attr->mq_flags = v.mq_flags;
	attr->mq_maxmsg = v.mq_maxmsg;
	attr->mq_msgsize = v.mq_msgsize;
	attr->mq_curmsgs = v.mq_curmsgs;
	return 0;
}

static inline int put_compat_mq_attr(const struct mq_attr *attr,
			struct compat_mq_attr __user *uattr)
{
	struct compat_mq_attr v;

	memset(&v, 0, sizeof(v));
	v.mq_flags = attr->mq_flags;
	v.mq_maxmsg = attr->mq_maxmsg;
	v.mq_msgsize = attr->mq_msgsize;
	v.mq_curmsgs = attr->mq_curmsgs;
	if (copy_to_user(uattr, &v, sizeof(*uattr)))
		return -EFAULT;
	return 0;
}

COMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name,
		       int, oflag, compat_mode_t, mode,
		       struct compat_mq_attr __user *, u_attr)
{
	struct mq_attr attr, *p = NULL;
	if (u_attr && oflag & O_CREAT) {
		p = &attr;
		if (get_compat_mq_attr(&attr, u_attr))
			return -EFAULT;
	}
	return do_mq_open(u_name, oflag, mode, p);
}

COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
		       const struct compat_sigevent __user *, u_notification)
{
	struct sigevent n, *p = NULL;
	if (u_notification) {
		if (get_compat_sigevent(&n, u_notification))
			return -EFAULT;
		if (n.sigev_notify == SIGEV_THREAD)
			n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int);
		p = &n;
	}
	return do_mq_notify(mqdes, p);
}

COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
		       const struct compat_mq_attr __user *, u_mqstat,
		       struct compat_mq_attr __user *, u_omqstat)
{
	int ret;
	struct mq_attr mqstat, omqstat;
	struct mq_attr *new = NULL, *old = NULL;

	if (u_mqstat) {
		new = &mqstat;
		if (get_compat_mq_attr(new, u_mqstat))
			return -EFAULT;
	}
	if (u_omqstat)
		old = &omqstat;

	ret = do_mq_getsetattr(mqdes, new, old);
	if (ret || !old)
		return ret;

	if (put_compat_mq_attr(old, u_omqstat))
		return -EFAULT;
	return 0;
}
#endif

#ifdef CONFIG_COMPAT_32BIT_TIME
static int compat_prepare_timeout(const struct old_timespec32 __user *p,
				   struct timespec64 *ts)
{
	if (get_old_timespec32(ts, p))
		return -EFAULT;
	if (!timespec64_valid(ts))
		return -EINVAL;
	return 0;
}

SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes,
		const char __user *, u_msg_ptr,
		unsigned int, msg_len, unsigned int, msg_prio,
		const struct old_timespec32 __user *, u_abs_timeout)
{
	struct timespec64 ts, *p = NULL;
	if (u_abs_timeout) {
		int res = compat_prepare_timeout(u_abs_timeout, &ts);
		if (res)
			return res;
		p = &ts;
	}
	return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
}

SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes,
		char __user *, u_msg_ptr,
		unsigned int, msg_len, unsigned int __user *, u_msg_prio,
		const struct old_timespec32 __user *, u_abs_timeout)
{
	struct timespec64 ts, *p = NULL;
	if (u_abs_timeout) {
		int res = compat_prepare_timeout(u_abs_timeout, &ts);
		if (res)
			return res;
		p = &ts;
	}
	return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
}
#endif

static const struct inode_operations mqueue_dir_inode_operations = {
	.lookup = simple_lookup,
	.create = mqueue_create,
	.unlink = mqueue_unlink,
};

static const struct file_operations mqueue_file_operations = {
	.flush = mqueue_flush_file,
	.poll = mqueue_poll_file,
	.read = mqueue_read_file,
	.llseek = default_llseek,
};

static const struct super_operations mqueue_super_ops = {
	.alloc_inode = mqueue_alloc_inode,
	.free_inode = mqueue_free_inode,
	.evict_inode = mqueue_evict_inode,
	.statfs = simple_statfs,
};

static const struct fs_context_operations mqueue_fs_context_ops = {
	.free		= mqueue_fs_context_free,
	.get_tree	= mqueue_get_tree,
};

static struct file_system_type mqueue_fs_type = {
	.name			= "mqueue",
	.init_fs_context	= mqueue_init_fs_context,
	.kill_sb		= kill_litter_super,
	.fs_flags		= FS_USERNS_MOUNT,
};

int mq_init_ns(struct ipc_namespace *ns)
{
	struct vfsmount *m;

	ns->mq_queues_count  = 0;
	ns->mq_queues_max    = DFLT_QUEUESMAX;
	ns->mq_msg_max       = DFLT_MSGMAX;
	ns->mq_msgsize_max   = DFLT_MSGSIZEMAX;
	ns->mq_msg_default   = DFLT_MSG;
	ns->mq_msgsize_default  = DFLT_MSGSIZE;

	m = mq_create_mount(ns);
	if (IS_ERR(m))
		return PTR_ERR(m);
	ns->mq_mnt = m;
	return 0;
}

void mq_clear_sbinfo(struct ipc_namespace *ns)
{
	ns->mq_mnt->mnt_sb->s_fs_info = NULL;
}

void mq_put_mnt(struct ipc_namespace *ns)
{
	kern_unmount(ns->mq_mnt);
}

static int __init init_mqueue_fs(void)
{
	int error;

	mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
				sizeof(struct mqueue_inode_info), 0,
				SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, init_once);
	if (mqueue_inode_cachep == NULL)
		return -ENOMEM;

	/* ignore failures - they are not fatal */
	mq_sysctl_table = mq_register_sysctl_table();

	error = register_filesystem(&mqueue_fs_type);
	if (error)
		goto out_sysctl;

	spin_lock_init(&mq_lock);

	error = mq_init_ns(&init_ipc_ns);
	if (error)
		goto out_filesystem;

	return 0;

out_filesystem:
	unregister_filesystem(&mqueue_fs_type);
out_sysctl:
	if (mq_sysctl_table)
		unregister_sysctl_table(mq_sysctl_table);
	kmem_cache_destroy(mqueue_inode_cachep);
	return error;
}

device_initcall(init_mqueue_fs);
Documentation/PCI/index.rst?id2=8bc22a2f1bf0f402029087fcb53130233a544fed'>Documentation/PCI/index.rst2
-rw-r--r--Documentation/PCI/msi-howto.rst2
-rw-r--r--Documentation/PCI/pci-error-recovery.rst58
-rw-r--r--Documentation/PCI/pci.rst4
-rw-r--r--Documentation/PCI/pcieaer-howto.rst104
-rw-r--r--Documentation/PCI/pciebus-howto.rst16
-rw-r--r--Documentation/PCI/tph.rst132
-rw-r--r--Documentation/RAS/ras.rst26
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.rst61
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst10
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg8
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg8
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg8
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg4
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.rst176
-rw-r--r--Documentation/RCU/RTFP.txt6
-rw-r--r--Documentation/RCU/checklist.rst106
-rw-r--r--Documentation/RCU/index.rst6
-rw-r--r--Documentation/RCU/listRCU.rst10
-rw-r--r--Documentation/RCU/lockdep.rst2
-rw-r--r--Documentation/RCU/rcu_dereference.rst5
-rw-r--r--Documentation/RCU/rcubarrier.rst5
-rw-r--r--Documentation/RCU/stallwarn.rst11
-rw-r--r--Documentation/RCU/torture.rst6
-rw-r--r--Documentation/RCU/whatisRCU.rst221
-rw-r--r--Documentation/accel/amdxdna/amdnpu.rst281
-rw-r--r--Documentation/accel/amdxdna/index.rst11
-rw-r--r--Documentation/accel/index.rst2
-rw-r--r--Documentation/accel/qaic/aic080.rst14
-rw-r--r--Documentation/accel/qaic/aic100.rst27
-rw-r--r--Documentation/accel/qaic/index.rst1
-rw-r--r--Documentation/accel/qaic/qaic.rst16
-rw-r--r--Documentation/accel/rocket/index.rst19
-rw-r--r--Documentation/accounting/delay-accounting.rst123
-rw-r--r--Documentation/accounting/taskstats-struct.rst2
-rw-r--r--Documentation/accounting/taskstats.rst54
-rw-r--r--Documentation/admin-guide/LSM/SELinux.rst11
-rw-r--r--Documentation/admin-guide/LSM/SafeSetID.rst2
-rw-r--r--Documentation/admin-guide/LSM/Smack.rst16
-rw-r--r--Documentation/admin-guide/LSM/apparmor.rst7
-rw-r--r--Documentation/admin-guide/LSM/index.rst2
-rw-r--r--Documentation/admin-guide/LSM/ipe.rst835
-rw-r--r--Documentation/admin-guide/LSM/landlock.rst158
-rw-r--r--Documentation/admin-guide/LSM/tomoyo.rst35
-rw-r--r--Documentation/admin-guide/RAS/address-translation.rst24
-rw-r--r--Documentation/admin-guide/RAS/error-decoding.rst21
-rw-r--r--Documentation/admin-guide/RAS/index.rst7
-rw-r--r--Documentation/admin-guide/RAS/main.rst1087
-rw-r--r--Documentation/admin-guide/README.rst79
-rw-r--r--Documentation/admin-guide/abi-obsolete-files.rst7
-rw-r--r--Documentation/admin-guide/abi-obsolete.rst6
-rw-r--r--Documentation/admin-guide/abi-removed-files.rst7
-rw-r--r--Documentation/admin-guide/abi-removed.rst6
-rw-r--r--Documentation/admin-guide/abi-stable-files.rst7
-rw-r--r--Documentation/admin-guide/abi-stable.rst6
-rw-r--r--Documentation/admin-guide/abi-testing-files.rst7
-rw-r--r--Documentation/admin-guide/abi-testing.rst6
-rw-r--r--Documentation/admin-guide/abi.rst18
-rw-r--r--Documentation/admin-guide/aoe/udev.txt6
-rw-r--r--Documentation/admin-guide/bcache.rst13
-rw-r--r--Documentation/admin-guide/blockdev/index.rst1
-rw-r--r--Documentation/admin-guide/blockdev/paride.rst2
-rw-r--r--Documentation/admin-guide/blockdev/zoned_loop.rst182
-rw-r--r--Documentation/admin-guide/blockdev/zram.rst133
-rw-r--r--Documentation/admin-guide/bootconfig.rst2
-rw-r--r--Documentation/admin-guide/braille-console.rst4
-rw-r--r--Documentation/admin-guide/bug-bisect.rst219
-rw-r--r--Documentation/admin-guide/bug-hunting.rst30
-rw-r--r--Documentation/admin-guide/cgroup-v1/cgroups.rst4
-rw-r--r--Documentation/admin-guide/cgroup-v1/cpusets.rst11
-rw-r--r--Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst4
-rw-r--r--Documentation/admin-guide/cgroup-v1/hugetlb.rst20
-rw-r--r--Documentation/admin-guide/cgroup-v1/memcg_test.rst2
-rw-r--r--Documentation/admin-guide/cgroup-v1/memory.rst121
-rw-r--r--Documentation/admin-guide/cgroup-v1/pids.rst3
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst456
-rw-r--r--Documentation/admin-guide/cifs/introduction.rst2
-rw-r--r--Documentation/admin-guide/cifs/usage.rst38
-rw-r--r--Documentation/admin-guide/device-mapper/delay.rst41
-rw-r--r--Documentation/admin-guide/device-mapper/dm-crypt.rst31
-rw-r--r--Documentation/admin-guide/device-mapper/dm-integrity.rst5
-rw-r--r--Documentation/admin-guide/device-mapper/dm-pcache.rst202
-rw-r--r--Documentation/admin-guide/device-mapper/index.rst3
-rw-r--r--Documentation/admin-guide/device-mapper/thin-provisioning.rst16
-rw-r--r--Documentation/admin-guide/device-mapper/vdo-design.rst633
-rw-r--r--Documentation/admin-guide/device-mapper/vdo.rst413
-rw-r--r--Documentation/admin-guide/device-mapper/verity.rst20
-rw-r--r--Documentation/admin-guide/dynamic-debug-howto.rst10
-rw-r--r--Documentation/admin-guide/edid.rst35
-rw-r--r--Documentation/admin-guide/efi-stub.rst3
-rw-r--r--Documentation/admin-guide/ext4.rst19
-rw-r--r--Documentation/admin-guide/gpio/gpio-aggregator.rst107
-rw-r--r--Documentation/admin-guide/gpio/gpio-mockup.rst8
-rw-r--r--Documentation/admin-guide/gpio/gpio-sim.rst9
-rw-r--r--Documentation/admin-guide/gpio/gpio-virtuser.rst177
-rw-r--r--Documentation/admin-guide/gpio/index.rst7
-rw-r--r--Documentation/admin-guide/gpio/obsolete.rst13
-rw-r--r--Documentation/admin-guide/highuid.rst80
-rw-r--r--Documentation/admin-guide/hw-vuln/attack_vector_controls.rst236
-rw-r--r--Documentation/admin-guide/hw-vuln/core-scheduling.rst4
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst6
-rw-r--r--Documentation/admin-guide/hw-vuln/indirect-target-selection.rst168
-rw-r--r--Documentation/admin-guide/hw-vuln/l1d_flush.rst2
-rw-r--r--Documentation/admin-guide/hw-vuln/mds.rst2
-rw-r--r--Documentation/admin-guide/hw-vuln/old_microcode.rst21
-rw-r--r--Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst4
-rw-r--r--Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst96
-rw-r--r--Documentation/admin-guide/hw-vuln/rsb.rst268
-rw-r--r--Documentation/admin-guide/hw-vuln/spectre.rst118
-rw-r--r--Documentation/admin-guide/hw-vuln/srso.rst84
-rw-r--r--Documentation/admin-guide/hw-vuln/vmscape.rst110
-rw-r--r--Documentation/admin-guide/index.rst164
-rw-r--r--Documentation/admin-guide/iostats.rst89
-rw-r--r--Documentation/admin-guide/kdump/kdump.rst74
-rw-r--r--Documentation/admin-guide/kdump/vmcoreinfo.rst16
-rw-r--r--Documentation/admin-guide/kernel-parameters.rst139
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt2224
-rw-r--r--Documentation/admin-guide/kernel-per-CPU-kthreads.rst9
-rw-r--r--Documentation/admin-guide/laptops/alienware-wmi.rst127
-rw-r--r--Documentation/admin-guide/laptops/index.rst3
-rw-r--r--Documentation/admin-guide/laptops/laptop-mode.rst8
-rw-r--r--Documentation/admin-guide/laptops/lg-laptop.rst4
-rw-r--r--Documentation/admin-guide/laptops/samsung-galaxybook.rst174
-rw-r--r--Documentation/admin-guide/laptops/sonypi.rst2
-rw-r--r--Documentation/admin-guide/laptops/thinkpad-acpi.rst11
-rw-r--r--Documentation/admin-guide/laptops/uniwill-laptop.rst60
-rw-r--r--Documentation/admin-guide/md.rst98
-rw-r--r--Documentation/admin-guide/media/building.rst2
-rw-r--r--Documentation/admin-guide/media/c3-isp.dot26
-rw-r--r--Documentation/admin-guide/media/c3-isp.rst101
-rw-r--r--Documentation/admin-guide/media/cec.rst87
-rw-r--r--Documentation/admin-guide/media/em28xx-cardlist.rst8
-rw-r--r--Documentation/admin-guide/media/i2c-cardlist.rst1
-rw-r--r--Documentation/admin-guide/media/imx.rst2
-rw-r--r--Documentation/admin-guide/media/index.rst5
-rw-r--r--Documentation/admin-guide/media/ipu3.rst6
-rw-r--r--Documentation/admin-guide/media/ipu6-isys.rst161
-rw-r--r--Documentation/admin-guide/media/ipu6_isys_graph.svg548
-rw-r--r--Documentation/admin-guide/media/ivtv.rst2
-rw-r--r--Documentation/admin-guide/media/mali-c55-graph.dot19
-rw-r--r--Documentation/admin-guide/media/mali-c55.rst413
-rw-r--r--Documentation/admin-guide/media/mgb4.rst71
-rw-r--r--Documentation/admin-guide/media/omap4_camera.rst62
-rw-r--r--Documentation/admin-guide/media/pci-cardlist.rst1
-rw-r--r--Documentation/admin-guide/media/platform-cardlist.rst2
-rw-r--r--Documentation/admin-guide/media/radio-cardlist.rst1
-rw-r--r--Documentation/admin-guide/media/raspberrypi-pisp-be.dot20
-rw-r--r--Documentation/admin-guide/media/raspberrypi-pisp-be.rst109
-rw-r--r--Documentation/admin-guide/media/raspberrypi-rp1-cfe.dot27
-rw-r--r--Documentation/admin-guide/media/raspberrypi-rp1-cfe.rst78
-rw-r--r--Documentation/admin-guide/media/rkcif-rk3568-vicap.dot8
-rw-r--r--Documentation/admin-guide/media/rkcif.rst79
-rw-r--r--Documentation/admin-guide/media/rkisp1.rst11
-rw-r--r--Documentation/admin-guide/media/saa7134.rst2
-rw-r--r--Documentation/admin-guide/media/si4713.rst6
-rw-r--r--Documentation/admin-guide/media/tuner-cardlist.rst2
-rw-r--r--Documentation/admin-guide/media/v4l-drivers.rst7
-rw-r--r--Documentation/admin-guide/media/visl.rst12
-rw-r--r--Documentation/admin-guide/media/vivid.rst193
-rw-r--r--Documentation/admin-guide/mm/cma_debugfs.rst10
-rw-r--r--Documentation/admin-guide/mm/damon/index.rst12
-rw-r--r--Documentation/admin-guide/mm/damon/lru_sort.rst22
-rw-r--r--Documentation/admin-guide/mm/damon/reclaim.rst49
-rw-r--r--Documentation/admin-guide/mm/damon/start.rst71
-rw-r--r--Documentation/admin-guide/mm/damon/stat.rst86
-rw-r--r--Documentation/admin-guide/mm/damon/usage.rst619
-rw-r--r--Documentation/admin-guide/mm/hugetlbpage.rst17
-rw-r--r--Documentation/admin-guide/mm/index.rst5
-rw-r--r--Documentation/admin-guide/mm/kho.rst115
-rw-r--r--Documentation/admin-guide/mm/ksm.rst2
-rw-r--r--Documentation/admin-guide/mm/memory-hotplug.rst9
-rw-r--r--Documentation/admin-guide/mm/multigen_lru.rst5
-rw-r--r--Documentation/admin-guide/mm/numa_memory_policy.rst9
-rw-r--r--Documentation/admin-guide/mm/pagemap.rst50
-rw-r--r--Documentation/admin-guide/mm/slab.rst469
-rw-r--r--Documentation/admin-guide/mm/swap_numa.rst78
-rw-r--r--Documentation/admin-guide/mm/transhuge.rst347
-rw-r--r--Documentation/admin-guide/mm/zswap.rst68
-rw-r--r--Documentation/admin-guide/namespaces/resource-control.rst24
-rw-r--r--Documentation/admin-guide/nfs/nfsroot.rst2
-rw-r--r--Documentation/admin-guide/nvme-multipath.rst72
-rw-r--r--Documentation/admin-guide/perf/arm-ni.rst17
-rw-r--r--Documentation/admin-guide/perf/dwc_pcie_pmu.rst26
-rw-r--r--Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst115
-rw-r--r--Documentation/admin-guide/perf/hisi-pcie-pmu.rst36
-rw-r--r--Documentation/admin-guide/perf/hisi-pmu.rst63
-rw-r--r--Documentation/admin-guide/perf/hns3-pmu.rst8
-rw-r--r--Documentation/admin-guide/perf/index.rst6
-rw-r--r--Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst80
-rw-r--r--Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst37
-rw-r--r--Documentation/admin-guide/perf/mrvl-pem-pmu.rst56
-rw-r--r--Documentation/admin-guide/perf/nvidia-pmu.rst52
-rw-r--r--Documentation/admin-guide/perf/qcom_l2_pmu.rst2
-rw-r--r--Documentation/admin-guide/perf/qcom_l3_pmu.rst2
-rw-r--r--Documentation/admin-guide/perf/starfive_starlink_pmu.rst46
-rw-r--r--Documentation/admin-guide/perf/thunderx2-pmu.rst2
-rw-r--r--Documentation/admin-guide/perf/xgene-pmu.rst2
-rw-r--r--Documentation/admin-guide/pm/amd-pstate.rst90
-rw-r--r--Documentation/admin-guide/pm/cpufreq.rst51
-rw-r--r--Documentation/admin-guide/pm/cpuidle.rst92
-rw-r--r--Documentation/admin-guide/pm/intel_idle.rst39
-rw-r--r--Documentation/admin-guide/pm/intel_pstate.rst228
-rw-r--r--Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst69
-rw-r--r--Documentation/admin-guide/pmf.rst24
-rw-r--r--Documentation/admin-guide/pnp.rst3
-rw-r--r--Documentation/admin-guide/quickly-build-trimmed-linux.rst10
-rw-r--r--Documentation/admin-guide/ramoops.rst13
-rw-r--r--Documentation/admin-guide/ras.rst1219
-rw-r--r--Documentation/admin-guide/reporting-issues.rst10
-rw-r--r--Documentation/admin-guide/reporting-regressions.rst12
-rw-r--r--Documentation/admin-guide/serial-console.rst4
-rw-r--r--Documentation/admin-guide/syscall-user-dispatch.rst23
-rw-r--r--Documentation/admin-guide/sysctl/fs.rst44
-rw-r--r--Documentation/admin-guide/sysctl/index.rst18
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst160
-rw-r--r--Documentation/admin-guide/sysctl/net.rst39
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst109
-rw-r--r--Documentation/admin-guide/sysrq.rst29
-rw-r--r--Documentation/admin-guide/tainted-kernels.rst11
-rw-r--r--Documentation/admin-guide/thermal/index.rst1
-rw-r--r--Documentation/admin-guide/thermal/intel_thermal_throttle.rst91
-rw-r--r--Documentation/admin-guide/thunderbolt.rst94
-rw-r--r--Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst2222
-rw-r--r--Documentation/admin-guide/workload-tracing.rst14
-rw-r--r--Documentation/admin-guide/xfs.rst117
-rw-r--r--Documentation/arch/arm/mem_alignment.rst2
-rw-r--r--Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst4
-rw-r--r--Documentation/arch/arm/stm32/stm32f746-overview.rst2
-rw-r--r--Documentation/arch/arm/stm32/stm32f769-overview.rst2
-rw-r--r--Documentation/arch/arm/stm32/stm32h743-overview.rst2
-rw-r--r--Documentation/arch/arm/stm32/stm32h750-overview.rst2
-rw-r--r--Documentation/arch/arm/stm32/stm32mp13-overview.rst2
-rw-r--r--Documentation/arch/arm/stm32/stm32mp151-overview.rst2
-rw-r--r--Documentation/arch/arm64/amu.rst2
-rw-r--r--Documentation/arch/arm64/arm-cca.rst69
-rw-r--r--Documentation/arch/arm64/asymmetric-32bit.rst10
-rw-r--r--Documentation/arch/arm64/booting.rst155
-rw-r--r--Documentation/arch/arm64/cpu-feature-registers.rst15
-rw-r--r--Documentation/arch/arm64/cpu-hotplug.rst79
-rw-r--r--Documentation/arch/arm64/elf_hwcaps.rst160
-rw-r--r--Documentation/arch/arm64/gcs.rst227
-rw-r--r--Documentation/arch/arm64/index.rst4
-rw-r--r--Documentation/arch/arm64/memory.rst75
-rw-r--r--Documentation/arch/arm64/mops.rst44
-rw-r--r--Documentation/arch/arm64/ptdump.rst2
-rw-r--r--Documentation/arch/arm64/silicon-errata.rst72
-rw-r--r--Documentation/arch/arm64/sme.rst37
-rw-r--r--Documentation/arch/arm64/sve.rst19
-rw-r--r--Documentation/arch/arm64/tagged-pointers.rst11
-rw-r--r--Documentation/arch/loongarch/irq-chip-model.rst96
-rw-r--r--Documentation/arch/m68k/buddha-driver.rst2
-rw-r--r--Documentation/arch/openrisc/openrisc_port.rst18
-rw-r--r--Documentation/arch/powerpc/booting.rst4
-rw-r--r--Documentation/arch/powerpc/cpu_families.rst18
-rw-r--r--Documentation/arch/powerpc/cxl.rst469
-rw-r--r--Documentation/arch/powerpc/cxlflash.rst433
-rw-r--r--Documentation/arch/powerpc/dexcr.rst141
-rw-r--r--Documentation/arch/powerpc/eeh-pci-error-recovery.rst1
-rw-r--r--Documentation/arch/powerpc/elf_hwcaps.rst1
-rw-r--r--Documentation/arch/powerpc/firmware-assisted-dump.rst113
-rw-r--r--Documentation/arch/powerpc/htm.rst104
-rw-r--r--Documentation/arch/powerpc/index.rst4
-rw-r--r--Documentation/arch/powerpc/kvm-nested.rst44
-rw-r--r--Documentation/arch/powerpc/papr_hcalls.rst11
-rw-r--r--Documentation/arch/powerpc/ultravisor.rst2
-rw-r--r--Documentation/arch/powerpc/vpa-dtl.rst156
-rw-r--r--Documentation/arch/riscv/cmodx.rst130
-rw-r--r--Documentation/arch/riscv/hwprobe.rst199
-rw-r--r--Documentation/arch/riscv/index.rst1
-rw-r--r--Documentation/arch/riscv/uabi.rst20
-rw-r--r--Documentation/arch/riscv/vector.rst2
-rw-r--r--Documentation/arch/riscv/vm-layout.rst33
-rw-r--r--Documentation/arch/s390/driver-model.rst23
-rw-r--r--Documentation/arch/s390/index.rst1
-rw-r--r--Documentation/arch/s390/mm.rst111
-rw-r--r--Documentation/arch/s390/s390dbf.rst5
-rw-r--r--Documentation/arch/s390/vfio-ap.rst62
-rw-r--r--Documentation/arch/sparc/oradax/dax-hv-api.txt2
-rw-r--r--Documentation/arch/x86/amd-debugging.rst368
-rw-r--r--Documentation/arch/x86/amd-hfi.rst133
-rw-r--r--Documentation/arch/x86/amd-memory-encryption.rst163
-rw-r--r--Documentation/arch/x86/amd_hsmp.rst104
-rw-r--r--Documentation/arch/x86/boot.rst453
-rw-r--r--Documentation/arch/x86/buslock.rst3
-rw-r--r--Documentation/arch/x86/cpuinfo.rst81
-rw-r--r--Documentation/arch/x86/exception-tables.rst2
-rw-r--r--Documentation/arch/x86/index.rst3
-rw-r--r--Documentation/arch/x86/mds.rst46
-rw-r--r--Documentation/arch/x86/pti.rst6
-rw-r--r--Documentation/arch/x86/resctrl.rst1480
-rw-r--r--Documentation/arch/x86/resume.svg4
-rw-r--r--Documentation/arch/x86/suspend.svg4
-rw-r--r--Documentation/arch/x86/sva.rst4
-rw-r--r--Documentation/arch/x86/tdx.rst14
-rw-r--r--Documentation/arch/x86/topology.rst219
-rw-r--r--Documentation/arch/x86/usb-legacy-support.rst11
-rw-r--r--Documentation/arch/x86/x86_64/5level-paging.rst9
-rw-r--r--Documentation/arch/x86/x86_64/boot-options.rst319
-rw-r--r--Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst2
-rw-r--r--Documentation/arch/x86/x86_64/fred.rst96
-rw-r--r--Documentation/arch/x86/x86_64/fsgs.rst6
-rw-r--r--Documentation/arch/x86/x86_64/index.rst2
-rw-r--r--Documentation/arch/x86/x86_64/mm.rst37
-rw-r--r--Documentation/arch/x86/x86_64/uefi.rst37
-rw-r--r--Documentation/arch/x86/xstate.rst2
-rw-r--r--Documentation/atomic_t.txt4
-rw-r--r--Documentation/block/bfq-iosched.rst22
-rw-r--r--Documentation/block/cmdline-partition.rst5
-rw-r--r--Documentation/block/data-integrity.rst49
-rw-r--r--Documentation/block/inline-encryption.rst255
-rw-r--r--Documentation/block/ublk.rst167
-rw-r--r--Documentation/block/writeback_cache_control.rst67
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst15
-rw-r--r--Documentation/bpf/bpf_iterators.rst119
-rw-r--r--Documentation/bpf/btf.rst141
-rw-r--r--Documentation/bpf/kfuncs.rst44
-rw-r--r--Documentation/bpf/libbpf/libbpf_overview.rst8
-rw-r--r--Documentation/bpf/libbpf/program_types.rst48
-rw-r--r--Documentation/bpf/map_array.rst5
-rw-r--r--Documentation/bpf/map_hash.rst8
-rw-r--r--Documentation/bpf/map_lpm_trie.rst2
-rw-r--r--Documentation/bpf/map_lru_hash_update.dot6
-rw-r--r--Documentation/bpf/standardization/abi.rst3
-rw-r--r--Documentation/bpf/standardization/instruction-set.rst775
-rw-r--r--Documentation/bpf/verifier.rst264
-rw-r--r--Documentation/cdrom/cdrom-standard.rst5
-rw-r--r--Documentation/cdrom/index.rst1
-rw-r--r--Documentation/cdrom/packet-writing.rst139
-rw-r--r--Documentation/conf.py655
-rw-r--r--Documentation/core-api/assoc_array.rst196
-rw-r--r--Documentation/core-api/cgroup.rst9
-rw-r--r--Documentation/core-api/cleanup.rst8
-rw-r--r--Documentation/core-api/cpu_hotplug.rst12
-rw-r--r--Documentation/core-api/dma-api-howto.rst60
-rw-r--r--Documentation/core-api/dma-api.rst272
-rw-r--r--Documentation/core-api/dma-attributes.rst18
-rw-r--r--Documentation/core-api/entry.rst8
-rw-r--r--Documentation/core-api/floating-point.rst78
-rw-r--r--Documentation/core-api/folio_queue.rst209
-rw-r--r--Documentation/core-api/genericirq.rst2
-rw-r--r--Documentation/core-api/gfp_mask-from-fs-io.rst20
-rw-r--r--Documentation/core-api/index.rst12
-rw-r--r--Documentation/core-api/irq/concepts.rst27
-rw-r--r--Documentation/core-api/irq/irq-affinity.rst6
-rw-r--r--Documentation/core-api/irq/irq-domain.rst235
-rw-r--r--Documentation/core-api/kernel-api.rst24
-rw-r--r--Documentation/core-api/kho/bindings/kho.yaml43
-rw-r--r--Documentation/core-api/kho/bindings/memblock/memblock.yaml39
-rw-r--r--Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml40
-rw-r--r--Documentation/core-api/kho/bindings/sub-fdt.yaml27
-rw-r--r--Documentation/core-api/kho/concepts.rst74
-rw-r--r--Documentation/core-api/kho/fdt.rst80
-rw-r--r--Documentation/core-api/kho/index.rst13
-rw-r--r--Documentation/core-api/kref.rst7
-rw-r--r--Documentation/core-api/list.rst776
-rw-r--r--Documentation/core-api/liveupdate.rst61
-rw-r--r--Documentation/core-api/memory-allocation.rst11
-rw-r--r--Documentation/core-api/memory-hotplug.rst91
-rw-r--r--Documentation/core-api/min_heap.rst302
-rw-r--r--Documentation/core-api/mm-api.rst8
-rw-r--r--Documentation/core-api/packing.rst189
-rw-r--r--Documentation/core-api/parser.rst17
-rw-r--r--Documentation/core-api/pin_user_pages.rst18
-rw-r--r--Documentation/core-api/printk-formats.rst76
-rw-r--r--Documentation/core-api/printk-index.rst4
-rw-r--r--Documentation/core-api/protection-keys.rst38
-rw-r--r--Documentation/core-api/real-time/architecture-porting.rst109
-rw-r--r--Documentation/core-api/real-time/differences.rst242
-rw-r--r--Documentation/core-api/real-time/index.rst16
-rw-r--r--Documentation/core-api/real-time/theory.rst116
-rw-r--r--Documentation/core-api/refcount-vs-atomic.rst37
-rw-r--r--Documentation/core-api/swiotlb.rst321
-rw-r--r--Documentation/core-api/symbol-namespaces.rst83
-rw-r--r--Documentation/core-api/this_cpu_ops.rst22
-rw-r--r--Documentation/core-api/unaligned-memory-access.rst2
-rw-r--r--Documentation/core-api/union_find.rst106
-rw-r--r--Documentation/core-api/workqueue.rst64
-rw-r--r--Documentation/core-api/xarray.rst38
-rw-r--r--Documentation/cpu-freq/cpu-drivers.rst3
-rw-r--r--Documentation/crypto/api-aead.rst3
-rw-r--r--Documentation/crypto/api-akcipher.rst7
-rw-r--r--Documentation/crypto/api-digest.rst3
-rw-r--r--Documentation/crypto/api-kpp.rst3
-rw-r--r--Documentation/crypto/api-rng.rst3
-rw-r--r--Documentation/crypto/api-sig.rst18
-rw-r--r--Documentation/crypto/api-skcipher.rst3
-rw-r--r--Documentation/crypto/api.rst1
-rw-r--r--Documentation/crypto/architecture.rst4
-rw-r--r--Documentation/crypto/async-tx-api.rst30
-rw-r--r--Documentation/crypto/crypto_engine.rst6
-rw-r--r--Documentation/crypto/index.rst2
-rw-r--r--Documentation/crypto/krb5.rst262
-rw-r--r--Documentation/crypto/sha3.rst130
-rw-r--r--Documentation/crypto/userspace-if.rst7
-rw-r--r--Documentation/dev-tools/autofdo.rst168
-rw-r--r--Documentation/dev-tools/checkpatch.rst70
-rw-r--r--Documentation/dev-tools/clang-format.rst (renamed from Documentation/process/clang-format.rst)0
-rw-r--r--Documentation/dev-tools/coccinelle.rst22
-rw-r--r--Documentation/dev-tools/gcov.rst13
-rw-r--r--Documentation/dev-tools/gpio-sloppy-logic-analyzer.rst93
-rw-r--r--Documentation/dev-tools/index.rst10
-rw-r--r--Documentation/dev-tools/kasan.rst65
-rw-r--r--Documentation/dev-tools/kcov.rst7
-rw-r--r--Documentation/dev-tools/kcsan.rst15
-rw-r--r--Documentation/dev-tools/kfence.rst7
-rw-r--r--Documentation/dev-tools/kmemleak.rst1
-rw-r--r--Documentation/dev-tools/kmsan.rst13
-rw-r--r--Documentation/dev-tools/kselftest.rst36
-rw-r--r--Documentation/dev-tools/ktap.rst5
-rw-r--r--Documentation/dev-tools/kunit/api/clk.rst10
-rw-r--r--Documentation/dev-tools/kunit/api/index.rst21
-rw-r--r--Documentation/dev-tools/kunit/api/of.rst13
-rw-r--r--Documentation/dev-tools/kunit/api/platformdevice.rst10
-rw-r--r--Documentation/dev-tools/kunit/run_manual.rst6
-rw-r--r--Documentation/dev-tools/kunit/run_wrapper.rst2
-rw-r--r--Documentation/dev-tools/kunit/style.rst29
-rw-r--r--Documentation/dev-tools/kunit/usage.rst387
-rw-r--r--Documentation/dev-tools/lkmm/docs/access-marking.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/cheatsheet.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/control-dependencies.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/explanation.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/glossary.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/herd-representation.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/index.rst21
-rw-r--r--Documentation/dev-tools/lkmm/docs/litmus-tests.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/locking.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/ordering.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/readme.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/recipes.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/references.rst11
-rw-r--r--Documentation/dev-tools/lkmm/docs/simple.rst11
-rw-r--r--Documentation/dev-tools/lkmm/index.rst15
-rw-r--r--Documentation/dev-tools/lkmm/readme.rst11
-rw-r--r--Documentation/dev-tools/propeller.rst162
-rw-r--r--Documentation/dev-tools/testing-devices.rst47
-rw-r--r--Documentation/dev-tools/testing-overview.rst2
-rw-r--r--Documentation/dev-tools/ubsan.rst28
-rw-r--r--Documentation/devicetree/bindings/.yamllint4
-rw-r--r--Documentation/devicetree/bindings/Makefile44
-rw-r--r--Documentation/devicetree/bindings/access-controllers/access-controllers.yaml84
-rw-r--r--Documentation/devicetree/bindings/arc/archs-pct.txt17
-rw-r--r--Documentation/devicetree/bindings/arc/snps,archs-pct.yaml33
-rw-r--r--Documentation/devicetree/bindings/arm/airoha,en7581-chip-scu.yaml42
-rw-r--r--Documentation/devicetree/bindings/arm/airoha.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/altera.yaml25
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.yaml102
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-sdram-controller.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-system.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/amd,seattle.yaml24
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic.yaml66
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml19
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/analog-top.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/assist.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/bootrom.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic/pmu.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/apm.yaml28
-rw-r--r--Documentation/devicetree/bindings/arm/apm/scu.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/apple.yaml242
-rw-r--r--Documentation/devicetree/bindings/arm/apple/apple,pmgr.yaml28
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-cti.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dummy-sink.yaml6
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dummy-source.yaml14
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dynamic-funnel.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-dynamic-replicator.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-etb10.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-etm.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-static-funnel.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-static-replicator.yaml36
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-tmc.yaml30
-rw-r--r--Documentation/devicetree/bindings/arm/arm,coresight-tpiu.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/arm,corstone1000.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/arm,embedded-trace-extension.yaml6
-rw-r--r--Documentation/devicetree/bindings/arm/arm,juno-fpga-apb-regs.yaml61
-rw-r--r--Documentation/devicetree/bindings/arm/arm,morello.yaml35
-rw-r--r--Documentation/devicetree/bindings/arm/arm,realview.yaml6
-rw-r--r--Documentation/devicetree/bindings/arm/arm,trace-buffer-extension.yaml10
-rw-r--r--Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml7
-rw-r--r--Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml20
-rw-r--r--Documentation/devicetree/bindings/arm/atmel,sama5d2-secumod.yaml49
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-at91.yaml52
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-sysregs.txt73
-rw-r--r--Documentation/devicetree/bindings/arm/axiado.yaml23
-rw-r--r--Documentation/devicetree/bindings/arm/axis.txt29
-rw-r--r--Documentation/devicetree/bindings/arm/axis.yaml36
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml7
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml2
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml58
-rw-r--r--Documentation/devicetree/bindings/arm/blaize.yaml40
-rw-r--r--Documentation/devicetree/bindings/arm/bst.yaml31
-rw-r--r--Documentation/devicetree/bindings/arm/cavium,thunder-88xx.yaml19
-rw-r--r--Documentation/devicetree/bindings/arm/cavium-thunder.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/cavium-thunder2.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/cirrus/cirrus,ep9301.yaml38
-rw-r--r--Documentation/devicetree/bindings/arm/cix.yaml26
-rw-r--r--Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp10
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.yaml295
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,imx51-m4if.yaml41
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-pm.yaml8
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,imx7ulp-sim.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt30
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/m4if.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/tigerp.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/fsl.yaml359
-rw-r--r--Documentation/devicetree/bindings/arm/google.yaml3
-rw-r--r--Documentation/devicetree/bindings/arm/intel,socfpga.yaml8
-rw-r--r--Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml2
-rw-r--r--Documentation/devicetree/bindings/arm/keystone/keystone.txt42
-rw-r--r--Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml7
-rw-r--r--Documentation/devicetree/bindings/arm/lge.yaml28
-rw-r--r--Documentation/devicetree/bindings/arm/marvell,berlin.yaml45
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/98dx3236.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/ap80x-system-controller.txt185
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-370-xp.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-375.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt32
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-37xx.yaml2
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-38x.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-38x.yaml70
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-39x.txt31
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml43
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-8kp.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/cp110-system-controller.txt234
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/kirkwood.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,armada-370-xp.yaml78
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,armada375.yaml21
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,armada390.yaml32
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,dove.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,dove.yaml35
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.txt105
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,kirkwood.yaml266
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/marvell,orion5x.yaml37
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek.yaml297
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.yaml16
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,camsys.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt30
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,ipesys.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,ipu.txt43
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,jpgdecsys.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mcucfg.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mfgcfg.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mipi0a.txt28
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml28
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,pciesys.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,ssusbsys.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,vcodecsys.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt29
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,vencltsys.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml5
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt58
-rw-r--r--Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml2
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.yaml15
-rw-r--r--Documentation/devicetree/bindings/arm/psci.yaml31
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-ctcu.yaml88
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-remote-etm.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-tnoc.yaml113
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-tpda.yaml41
-rw-r--r--Documentation/devicetree/bindings/arm/qcom,coresight-tpdm.yaml70
-rw-r--r--Documentation/devicetree/bindings/arm/qcom-soc.yaml12
-rw-r--r--Documentation/devicetree/bindings/arm/qcom.yaml302
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip.yaml466
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip/pmu.yaml14
-rw-r--r--Documentation/devicetree/bindings/arm/rtsm-dcscb.txt19
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/samsung-boards.yaml41
-rw-r--r--Documentation/devicetree/bindings/arm/spear-misc.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/sti.yaml4
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml23
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml33
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/stm32.yaml53
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.yaml143
-rw-r--r--Documentation/devicetree/bindings/arm/syna.txt101
-rw-r--r--Documentation/devicetree/bindings/arm/tegra.yaml98
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.yaml59
-rw-r--r--Documentation/devicetree/bindings/arm/ti/k3.yaml95
-rw-r--r--Documentation/devicetree/bindings/arm/ti/omap.yaml15
-rw-r--r--Documentation/devicetree/bindings/arm/ti/ti,keystone.yaml42
-rw-r--r--Documentation/devicetree/bindings/arm/vt8500.yaml10
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-da850.txt18
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-dm816.txt21
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-fsl-qoriq.txt21
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-mtk.txt51
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-platform.yaml36
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-st.txt35
-rw-r--r--Documentation/devicetree/bindings/ata/apm,xgene-ahci.yaml65
-rw-r--r--Documentation/devicetree/bindings/ata/apm-xgene.txt77
-rw-r--r--Documentation/devicetree/bindings/ata/arasan,cf-spear1340.yaml70
-rw-r--r--Documentation/devicetree/bindings/ata/atmel-at91_cf.txt19
-rw-r--r--Documentation/devicetree/bindings/ata/cavium,ebt3000-compact-flash.yaml59
-rw-r--r--Documentation/devicetree/bindings/ata/cavium-compact-flash.txt30
-rw-r--r--Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml6
-rw-r--r--Documentation/devicetree/bindings/ata/cirrus,ep9312-pata.yaml42
-rw-r--r--Documentation/devicetree/bindings/ata/eswin,eic7700-ahci.yaml79
-rw-r--r--Documentation/devicetree/bindings/ata/fsl,ahci.yaml64
-rw-r--r--Documentation/devicetree/bindings/ata/fsl,imx-pata.yaml42
-rw-r--r--Documentation/devicetree/bindings/ata/fsl,pq-sata.yaml60
-rw-r--r--Documentation/devicetree/bindings/ata/fsl-sata.txt28
-rw-r--r--Documentation/devicetree/bindings/ata/imx-pata.txt16
-rw-r--r--Documentation/devicetree/bindings/ata/imx-sata.yaml50
-rw-r--r--Documentation/devicetree/bindings/ata/marvell,orion-sata.yaml83
-rw-r--r--Documentation/devicetree/bindings/ata/marvell.txt22
-rw-r--r--Documentation/devicetree/bindings/ata/mediatek,mtk-ahci.yaml98
-rw-r--r--Documentation/devicetree/bindings/ata/pata-arasan.txt37
-rw-r--r--Documentation/devicetree/bindings/ata/qcom-sata.txt48
-rw-r--r--Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml6
-rw-r--r--Documentation/devicetree/bindings/ata/sata_highbank.yaml2
-rw-r--r--Documentation/devicetree/bindings/ata/snps,dwc-ahci.yaml4
-rw-r--r--Documentation/devicetree/bindings/ata/st,ahci.yaml72
-rw-r--r--Documentation/devicetree/bindings/ata/ti,da850-ahci.yaml39
-rw-r--r--Documentation/devicetree/bindings/ata/ti,dm816-ahci.yaml43
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml4
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/gpio-7-segment.yaml55
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml68
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/holtek,ht16k33.yaml54
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/img,ascii-lcd.yaml4
-rw-r--r--Documentation/devicetree/bindings/auxdisplay/maxim,max6959.yaml44
-rw-r--r--Documentation/devicetree/bindings/board/fsl,bcsr.yaml32
-rw-r--r--Documentation/devicetree/bindings/board/fsl,fpga-qixis-i2c.yaml128
-rw-r--r--Documentation/devicetree/bindings/board/fsl,fpga-qixis.yaml91
-rw-r--r--Documentation/devicetree/bindings/board/fsl-board.txt81
-rw-r--r--Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml2
-rw-r--r--Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml2
-rw-r--r--Documentation/devicetree/bindings/bus/brcm,gisb-arb.yaml1
-rw-r--r--Documentation/devicetree/bindings/bus/cznic,moxtet.yaml94
-rw-r--r--Documentation/devicetree/bindings/bus/fsl,imx8mp-aipstz.yaml104
-rw-r--r--Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml9
-rw-r--r--Documentation/devicetree/bindings/bus/imx-weim.txt117
-rw-r--r--Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml16
-rw-r--r--Documentation/devicetree/bindings/bus/moxtet.txt46
-rw-r--r--Documentation/devicetree/bindings/bus/nvidia,tegra210-aconnect.yaml1
-rw-r--r--Documentation/devicetree/bindings/bus/qcom,ebi2.txt138
-rw-r--r--Documentation/devicetree/bindings/bus/renesas,bsc.yaml12
-rw-r--r--Documentation/devicetree/bindings/bus/st,stm32-etzpc.yaml96
-rw-r--r--Documentation/devicetree/bindings/bus/st,stm32mp25-rifsc.yaml109
-rw-r--r--Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml26
-rw-r--r--Documentation/devicetree/bindings/cache/l2c2x0.yaml5
-rw-r--r--Documentation/devicetree/bindings/cache/marvell,feroceon-cache.txt16
-rw-r--r--Documentation/devicetree/bindings/cache/marvell,kirkwood-cache.yaml45
-rw-r--r--Documentation/devicetree/bindings/cache/marvell,tauros2-cache.txt17
-rw-r--r--Documentation/devicetree/bindings/cache/marvell,tauros2-cache.yaml39
-rw-r--r--Documentation/devicetree/bindings/cache/qcom,llcc.yaml149
-rw-r--r--Documentation/devicetree/bindings/cache/sifive,ccache0.yaml49
-rw-r--r--Documentation/devicetree/bindings/cache/starfive,jh8100-starlink-cache.yaml66
-rw-r--r--Documentation/devicetree/bindings/chrome/google,cros-kbd-led-backlight.yaml36
-rw-r--r--Documentation/devicetree/bindings/clock/actions,owl-cmu.txt52
-rw-r--r--Documentation/devicetree/bindings/clock/actions,owl-cmu.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml26
-rw-r--r--Documentation/devicetree/bindings/clock/airoha,en7523-scu.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-gates-clk.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun55i-a523-ccu.yaml136
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun8i-a83t-de2-clk.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/alphascale,acc.txt114
-rw-r--r--Documentation/devicetree/bindings/clock/alphascale,asm9260-clock-controller.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/altr_socfpga.txt30
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml9
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml9
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt59
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.yaml201
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml120
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,meson8-clkc.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/amlogic,meson8b-clkc.txt51
-rw-r--r--Documentation/devicetree/bindings/clock/apm,xgene-device-clock.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/apm,xgene-socpll-clock.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/apple,nco.yaml17
-rw-r--r--Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt71
-rw-r--r--Documentation/devicetree/bindings/clock/armada3700-tbg-clock.txt27
-rw-r--r--Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt29
-rw-r--r--Documentation/devicetree/bindings/clock/artpec6.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml8
-rw-r--r--Documentation/devicetree/bindings/clock/atmel,at91sam9x5-sckc.yaml5
-rw-r--r--Documentation/devicetree/bindings/clock/axis,artpec6-clkctrl.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/axis,artpec8-clock.yaml213
-rw-r--r--Documentation/devicetree/bindings/clock/baikal,bt1-ccu-div.yaml8
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt31
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt60
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.txt36
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,bcm63xx-clocks.yaml44
-rw-r--r--Documentation/devicetree/bindings/clock/cirrus,ep7209-clk.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/cirrus,lochnagar.yaml6
-rw-r--r--Documentation/devicetree/bindings/clock/clps711x-clock.txt19
-rw-r--r--Documentation/devicetree/bindings/clock/dove-divider-clock.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-clock.yaml9
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml9
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,imx8ulp-sim-lpav.yaml72
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,qoriq-clock-legacy.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,qoriq-clock.yaml207
-rw-r--r--Documentation/devicetree/bindings/clock/fsl,vf610-ccm.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt26
-rw-r--r--Documentation/devicetree/bindings/clock/gated-fixed-clock.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/google,gs101-clock.yaml86
-rw-r--r--Documentation/devicetree/bindings/clock/idt,versaclock5.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/img,pistachio-clk.yaml136
-rw-r--r--Documentation/devicetree/bindings/clock/imx6q-clock.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sl-clock.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sll-clock.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sx-clock.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/imx6ul-clock.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/imx7d-clock.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/imx8m-clock.yaml11
-rw-r--r--Documentation/devicetree/bindings/clock/imx8mp-audiomix.yaml13
-rw-r--r--Documentation/devicetree/bindings/clock/imx93-clock.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-gate.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-pll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/loongson,ls2k-clk.yaml22
-rw-r--r--Documentation/devicetree/bindings/clock/lpc1850-ccu.txt77
-rw-r--r--Documentation/devicetree/bindings/clock/lpc1850-cgu.txt131
-rw-r--r--Documentation/devicetree/bindings/clock/lpc1850-creg-clk.txt52
-rw-r--r--Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt29
-rw-r--r--Documentation/devicetree/bindings/clock/lsi,axm5516-clks.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/lsi,nspire-cx-clock.yaml33
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,ap80x-clock.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-370-corediv-clock.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-3700-periph-clock.yaml96
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-3700-tbg-clock.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,armada-xp-cpu-clock.yaml44
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,berlin.txt31
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,berlin2-clk.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,cp110-clock.yaml70
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,dove-divider-clock.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,mvebu-core-clock.yaml94
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,pxa1908.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/marvell-armada-370-gating-clock.yaml227
-rw-r--r--Documentation/devicetree/bindings/clock/maxim,max77686.txt114
-rw-r--r--Documentation/devicetree/bindings/clock/maxim,max9485.txt59
-rw-r--r--Documentation/devicetree/bindings/clock/maxim,max9485.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,apmixedsys.yaml6
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,infracfg.yaml (renamed from Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.yaml)10
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt2701-hifsys.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt6795-sys-clock.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt7622-pciesys.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt7622-ssusbsys.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8186-clock.yaml (renamed from Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-clock.yaml)2
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8186-sys-clock.yaml (renamed from Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8186-sys-clock.yaml)2
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml24
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8188-sys-clock.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8192-clock.yaml (renamed from Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-clock.yaml)2
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8192-sys-clock.yaml (renamed from Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8192-sys-clock.yaml)2
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8195-clock.yaml (renamed from Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-clock.yaml)2
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8195-sys-clock.yaml (renamed from Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-sys-clock.yaml)2
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8196-clock.yaml112
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8196-sys-clock.yaml107
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mtmips-sysc.yaml39
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,pericfg.yaml (renamed from Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml)3
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,syscon.yaml112
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,topckgen.yaml4
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,lan966x-gck.yaml13
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml36
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,pic32.txt39
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,pic32mzda-clk.yaml45
-rw-r--r--Documentation/devicetree/bindings/clock/milbeaut-clock.yaml29
-rw-r--r--Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt48
-rw-r--r--Documentation/devicetree/bindings/clock/moxa,moxart-clock.yaml38
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-core-clock.txt87
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt205
-rw-r--r--Documentation/devicetree/bindings/clock/nspire-clock.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.txt100
-rw-r--r--Documentation/devicetree/bindings/clock/nuvoton,npcm750-clk.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra124-car.yaml8
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml6
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,imx95-blk-ctl.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,imx95-display-master-csr.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc1850-ccu.yaml104
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc1850-cgu.yaml99
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.txt30
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc3220-clk.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/nxp,lpc3220-usb-clk.yaml35
-rw-r--r--Documentation/devicetree/bindings/clock/pistachio-clock.txt123
-rw-r--r--Documentation/devicetree/bindings/clock/qca,ath79-pll.txt33
-rw-r--r--Documentation/devicetree/bindings/clock/qca,ath79-pll.yaml70
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,a53pll.yaml4
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,camcc-sm8250.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml20
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc-sm6125.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml24
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,dispcc-sm8x50.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-apq8064.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-apq8084.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-ipq4019.yaml5
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-ipq6018.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-ipq8064.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-ipq8074.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-mdm9607.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-mdm9615.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8660.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8909.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8916.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8953.yaml12
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8974.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8976.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml5
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8996.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-msm8998.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-other.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-qcm2290.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-qcs404.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc7180.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc7280.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc8180x.yaml10
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sc8280xp.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sdm660.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sdm845.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sdx65.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm6115.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml5
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml5
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm8150.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm8250.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm8350.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc-sm8450.yaml7
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,glymur-dispcc.yaml98
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,glymur-gcc.yaml121
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gpucc-sdm660.yaml20
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gpucc.yaml30
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,hfpll.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,hfpll.yaml69
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq5018-gcc.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq5332-gcc.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq5424-apss-clk.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq9574-cmn-pll.yaml79
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq9574-gcc.yaml6
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,ipq9574-nsscc.yaml145
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,krait-cc.txt34
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,krait-cc.yaml43
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,milos-camcc.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,milos-dispcc.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,milos-gcc.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,milos-videocc.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,mmcc.yaml28
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,msm8998-gpucc.yaml22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml86
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcm2290-dispcc.yaml22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml77
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs404-turingcc.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs615-dispcc.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs615-gpucc.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs615-videocc.yaml47
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qcs8300-gcc.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qdu1000-ecpricc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,qdu1000-gcc.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,rpmcc.yaml10
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml6
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml81
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sa8775p-dispcc.yaml79
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sa8775p-gcc.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml63
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sar2130p-gcc.yaml65
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7180-dispcc.yaml22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7180-mss.yaml61
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7280-camcc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7280-dispcc.yaml22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscorecc.yaml32
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc8180x-camcc.yaml67
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sc8280xp-lpasscc.yaml13
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sdm845-camcc.yaml8
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sdm845-dispcc.yaml22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sdm845-lpasscc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sdx75-gcc.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm4450-camcc.yaml51
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm4450-dispcc.yaml59
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm4450-gcc.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6115-dispcc.yaml22
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml4
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6115-lpasscc.yaml46
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml4
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml13
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml4
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml5
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml4
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml60
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm7150-gcc.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8150-camcc.yaml65
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-camcc.yaml44
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-dispcc.yaml23
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml32
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8450-videocc.yaml62
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8550-dispcc.yaml32
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8550-gcc.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8550-tcsr.yaml9
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8650-dispcc.yaml106
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8650-gcc.yaml3
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,sm8750-gcc.yaml66
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,turingcc.txt19
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,videocc.yaml73
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,x1e80100-camcc.yaml75
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,x1e80100-gcc.yaml71
-rw-r--r--Documentation/devicetree/bindings/clock/qoriq-clock.txt212
-rw-r--r--Documentation/devicetree/bindings/clock/raspberrypi,rp1-clocks.yaml58
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,5p35023.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-clocks.yaml8
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-mssr.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r9a08g045-vbattb.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rzg2l-cpg.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml84
-rw-r--r--Documentation/devicetree/bindings/clock/riscv,rpmi-clock.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/riscv,rpmi-mpxy-clock.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt58
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.yaml74
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3506-cru.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3528-cru.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3562-cru.yaml55
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3576-cru.yaml56
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3588-cru.yaml4
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rv1126b-cru.yaml52
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos2200-cmu.yaml247
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos7870-cmu.yaml227
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml42
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos8895-clock.yaml239
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml164
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml19
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynosautov920-clock.yaml298
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c6400-clock.yaml57
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt76
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si514.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si5341.txt175
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si5341.yaml223
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si544.txt25
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si544.yaml54
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si570.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si570.yaml80
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,cv1800-clk.yaml17
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml61
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml53
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml49
-rw-r--r--Documentation/devicetree/bindings/clock/sophgo,sg2044-clk.yaml99
-rw-r--r--Documentation/devicetree/bindings/clock/spacemit,k1-pll.yaml50
-rw-r--r--Documentation/devicetree/bindings/clock/sprd,sc9860-clk.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/sprd,sc9860-clk.yaml159
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32-rcc.txt138
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32-rcc.yaml144
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt71
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml16
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32mp21-rcc.yaml199
-rw-r--r--Documentation/devicetree/bindings/clock/st,stm32mp25-rcc.yaml177
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,flexgen.txt3
-rw-r--r--Documentation/devicetree/bindings/clock/stericsson,abx500.txt20
-rw-r--r--Documentation/devicetree/bindings/clock/tesla,fsd-clock.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/thead,th1520-clk-ap.yaml64
-rw-r--r--Documentation/devicetree/bindings/clock/ti,clkctrl.yaml65
-rw-r--r--Documentation/devicetree/bindings/clock/ti,sci-clk.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/ti-clkctrl.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt20
-rw-r--r--Documentation/devicetree/bindings/clock/ti/adpll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/apll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/autoidle.txt39
-rw-r--r--Documentation/devicetree/bindings/clock/ti/clockdomain.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/composite.txt57
-rw-r--r--Documentation/devicetree/bindings/clock/ti/divider.txt117
-rw-r--r--Documentation/devicetree/bindings/clock/ti/dpll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fapll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt44
-rw-r--r--Documentation/devicetree/bindings/clock/ti/gate.txt107
-rw-r--r--Documentation/devicetree/bindings/clock/ti/interface.txt57
-rw-r--r--Documentation/devicetree/bindings/clock/ti/mux.txt80
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,autoidle.yaml34
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,composite-clock.yaml82
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,divider-clock.yaml179
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,fixed-factor-clock.yaml76
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,gate-clock.yaml125
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,interface-clock.yaml71
-rw-r--r--Documentation/devicetree/bindings/clock/ti/ti,mux-clock.yaml125
-rw-r--r--Documentation/devicetree/bindings/clock/vf610-clock.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/xgene.txt131
-rw-r--r--Documentation/devicetree/bindings/clock/xlnx,clocking-wizard.yaml7
-rw-r--r--Documentation/devicetree/bindings/clock/xlnx,vcu.yaml59
-rw-r--r--Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml88
-rw-r--r--Documentation/devicetree/bindings/connector/usb-connector.yaml53
-rw-r--r--Documentation/devicetree/bindings/counter/fsl,ftm-quaddec.yaml36
-rw-r--r--Documentation/devicetree/bindings/counter/ftm-quaddec.txt18
-rw-r--r--Documentation/devicetree/bindings/counter/ti-eqep.yaml27
-rw-r--r--Documentation/devicetree/bindings/cpu/cpu-topology.txt553
-rw-r--r--Documentation/devicetree/bindings/cpu/idle-states.yaml2
-rw-r--r--Documentation/devicetree/bindings/cpu/nvidia,tegra186-ccplex-cluster.yaml37
-rw-r--r--Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml55
-rw-r--r--Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml13
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt61
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek.txt250
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml45
-rw-r--r--Documentation/devicetree/bindings/cpufreq/mediatek,mt8196-cpufreq-hw.yaml82
-rw-r--r--Documentation/devicetree/bindings/cpufreq/qemu,virtual-cpufreq.yaml48
-rw-r--r--Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml2
-rw-r--r--Documentation/devicetree/bindings/crypto/amd,ccp-seattle-v1a.yaml41
-rw-r--r--Documentation/devicetree/bindings/crypto/amd-ccp.txt17
-rw-r--r--Documentation/devicetree/bindings/crypto/artpec6-crypto.txt16
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml8
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml8
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml8
-rw-r--r--Documentation/devicetree/bindings/crypto/axis,artpec6-crypto.yaml39
-rw-r--r--Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt22
-rw-r--r--Documentation/devicetree/bindings/crypto/brcm,spum-crypto.yaml44
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl,sec-v4.0-mon.yaml5
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml66
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl,sec2.0.yaml144
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec2.txt65
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec6.txt157
-rw-r--r--Documentation/devicetree/bindings/crypto/hisilicon,hip06-sec.yaml134
-rw-r--r--Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt67
-rw-r--r--Documentation/devicetree/bindings/crypto/img,hash-accelerator.yaml69
-rw-r--r--Documentation/devicetree/bindings/crypto/img-hash.txt27
-rw-r--r--Documentation/devicetree/bindings/crypto/inside-secure,safexcel-eip93.yaml67
-rw-r--r--Documentation/devicetree/bindings/crypto/inside-secure,safexcel.yaml2
-rw-r--r--Documentation/devicetree/bindings/crypto/marvell,orion-crypto.yaml133
-rw-r--r--Documentation/devicetree/bindings/crypto/marvell-cesa.txt44
-rw-r--r--Documentation/devicetree/bindings/crypto/mediatek-crypto.txt25
-rw-r--r--Documentation/devicetree/bindings/crypto/mv_cesa.txt32
-rw-r--r--Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-aes.yaml52
-rw-r--r--Documentation/devicetree/bindings/crypto/nvidia,tegra234-se-hash.yaml52
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-aes.txt31
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-des.txt30
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-sham.txt28
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml5
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom,prng.yaml8
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom-qce.yaml10
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml4
-rw-r--r--Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml4
-rw-r--r--Documentation/devicetree/bindings/crypto/starfive,jh7110-crypto.yaml30
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,am62l-dthev2.yaml50
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,omap-sham.yaml56
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml58
-rw-r--r--Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml65
-rw-r--r--Documentation/devicetree/bindings/crypto/xlnx,versal-trng.yaml35
-rw-r--r--Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml13
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun4i-a10-display-frontend.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun6i-a31-drc.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-de2-mixer.yaml34
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun8i-a83t-dw-hdmi.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml13
-rw-r--r--Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/apple,h7-display-pipe-mipi.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/apple,h7-display-pipe.yaml88
-rw-r--r--Documentation/devicetree/bindings/display/arm,pl11x.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/atmel,lcdc-display.yaml103
-rw-r--r--Documentation/devicetree/bindings/display/atmel,lcdc.txt87
-rw-r--r--Documentation/devicetree/bindings/display/atmel,lcdc.yaml70
-rw-r--r--Documentation/devicetree/bindings/display/atmel/atmel,hlcdc-display-controller.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt75
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml78
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-hvs.yaml85
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-pixelvalve0.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-txp.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2835-vc4.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/bridge/adi,adv7511.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/bridge/fsl,imx8mp-hdmi-tx.yaml114
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ite,it6263.yaml251
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml8
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml6
-rw-r--r--Documentation/devicetree/bindings/display/bridge/lontium,lt9611.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/bridge/megachips,stdp2690-ge-b850v3-fw.yaml111
-rw-r--r--Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt91
-rw-r--r--Documentation/devicetree/bindings/display/bridge/microchip,sam9x75-lvds.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml18
-rw-r--r--Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml12
-rw-r--r--Documentation/devicetree/bindings/display/bridge/parade,ps8622.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,dsi-csi2-tx.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/bridge/renesas,dsi.yaml67
-rw-r--r--Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml103
-rw-r--r--Documentation/devicetree/bindings/display/bridge/sil,sii8620.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/bridge/sil,sii9022.yaml20
-rw-r--r--Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/bridge/solomon,ssd2825.yaml141
-rw-r--r--Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml13
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml33
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/bridge/ti,tdp158.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml24
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml39
-rw-r--r--Documentation/devicetree/bindings/display/bridge/waveshare,dsi2dpi.yaml103
-rw-r--r--Documentation/devicetree/bindings/display/connector/dp-connector.yaml52
-rw-r--r--Documentation/devicetree/bindings/display/dsi-controller.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/elgin,jg10309-01.yaml54
-rw-r--r--Documentation/devicetree/bindings/display/exynos/exynos_dp.txt112
-rw-r--r--Documentation/devicetree/bindings/display/fsl,dcu.txt34
-rw-r--r--Documentation/devicetree/bindings/display/fsl,lcdif.yaml47
-rw-r--r--Documentation/devicetree/bindings/display/fsl,ls1021a-dcu.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/fsl,tcon.txt17
-rw-r--r--Documentation/devicetree/bindings/display/fsl,vf610-tcon.yaml43
-rw-r--r--Documentation/devicetree/bindings/display/himax,hx8357.yaml78
-rw-r--r--Documentation/devicetree/bindings/display/himax,hx8357d.txt26
-rw-r--r--Documentation/devicetree/bindings/display/ilitek,ili9486.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx-display-subsystem.yaml36
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx-parallel-display.yaml74
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6-hdmi.yaml8
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6q-ipu.yaml97
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6q-ldb.yaml193
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6qp-pre.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx6qp-prg.yaml54
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8mp-hdmi-pai.yaml69
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8mp-hdmi-pvi.yaml84
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-axi-performance-counter.yaml57
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blit-engine.yaml204
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-blitblend.yaml41
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-clut.yaml44
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-command-sequencer.yaml67
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-constframe.yaml44
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-display-engine.yaml152
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-dither.yaml45
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-extdst.yaml72
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-fetchunit.yaml141
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-filter.yaml43
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-framegen.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-gammacor.yaml32
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-layerblend.yaml39
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-matrix.yaml44
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-pixel-engine.yaml250
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-rop.yaml43
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-safety.yaml34
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-scaling-engine.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-signature.yaml53
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-store.yaml96
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc-tcon.yaml45
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl,imx8qxp-dc.yaml236
-rw-r--r--Documentation/devicetree/bindings/display/imx/fsl-imx-drm.txt162
-rw-r--r--Documentation/devicetree/bindings/display/imx/ldb.txt147
-rw-r--r--Documentation/devicetree/bindings/display/lvds-data-mapping.yaml31
-rw-r--r--Documentation/devicetree/bindings/display/lvds-dual-ports.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/lvds.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/mayqueen,pixpaper.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,aal.yaml59
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ccorr.yaml24
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,color.yaml27
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dither.yaml23
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dp.yaml22
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml56
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dsc.yaml27
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml28
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ethdr.yaml22
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,gamma.yaml25
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,merge.yaml27
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml41
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml151
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,od.yaml36
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ovl-2l.yaml22
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ovl.yaml35
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,padding.yaml10
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,postmask.yaml21
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,rdma.yaml23
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,split.yaml19
-rw-r--r--Documentation/devicetree/bindings/display/mediatek/mediatek,ufoe.yaml36
-rw-r--r--Documentation/devicetree/bindings/display/msm/dp-controller.yaml169
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml247
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml48
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml35
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-20nm.yaml36
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml35
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml37
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-phy-common.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/msm/gmu.yaml101
-rw-r--r--Documentation/devicetree/bindings/display/msm/gpu.yaml253
-rw-r--r--Documentation/devicetree/bindings/display/msm/hdmi.yaml47
-rw-r--r--Documentation/devicetree/bindings/display/msm/mdp4.yaml9
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,glymur-mdss.yaml264
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,mdp5.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,mdss.yaml14
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,qcs8300-mdss.yaml286
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml464
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml443
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc7280-dpu.yaml11
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc7280-mdss.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc8180x-dpu.yaml103
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc8180x-mdss.yaml359
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sc8280xp-dpu.yaml122
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6150-dpu.yaml108
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6150-mdss.yaml255
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6350-mdss.yaml9
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm7150-dpu.yaml143
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm7150-mdss.yaml464
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8150-dpu.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml9
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8250-dpu.yaml99
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8350-dpu.yaml120
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml13
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8450-dpu.yaml139
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8550-dpu.yaml133
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8550-mdss.yaml14
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8650-dpu.yaml12
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8650-mdss.yaml17
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8750-mdss.yaml474
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,x1e80100-mdss.yaml255
-rw-r--r--Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/advantech,idk-2121wr.yaml14
-rw-r--r--Documentation/devicetree/bindings/display/panel/anbernic,rg35xx-plus-panel.yaml67
-rw-r--r--Documentation/devicetree/bindings/display/panel/apple,summit.yaml58
-rw-r--r--Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml7
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,bf060y8m-aj0.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,himax8279d.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,td4320.yaml65
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,th101mb31ig002-28a.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,tv101wum-ll2.yaml63
-rw-r--r--Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/fascontek,fs035vg158.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx8279.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx83102.yaml83
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx83112a.yaml76
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx83112b.yaml73
-rw-r--r--Documentation/devicetree/bindings/display/panel/himax,hx8394.yaml37
-rw-r--r--Documentation/devicetree/bindings/display/panel/hydis,hv101hd1.yaml60
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,il79900a.yaml68
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9805.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9806e.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.yaml10
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/innolux,p097pfg.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/jadard,jd9365da-h3.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/jdi,lpm102a188a.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/jdi,lt070me05000.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml6
-rw-r--r--Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml10
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,ld070wx3-sl01.yaml60
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,lg4573.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/lg,sw43408.yaml64
-rw-r--r--Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml6
-rw-r--r--Documentation/devicetree/bindings/display/panel/nec,nl8048hl11.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/newvision,nv3051d.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml10
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt35950.yaml7
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt36523.yaml29
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt36672e.yaml66
-rw-r--r--Documentation/devicetree/bindings/display/panel/novatek,nt37801.yaml69
-rw-r--r--Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-common-dual.yaml47
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-common.yaml8
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-edp-legacy.yaml117
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-lvds.yaml14
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml33
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml36
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-simple-lvds-dual-ports.yaml34
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-simple.yaml121
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-timing.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml29
-rw-r--r--Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml29
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm67191.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm67200.yaml71
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm692e5.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/raydium,rm69380.yaml90
-rw-r--r--Documentation/devicetree/bindings/display/panel/renesas,r61307.yaml94
-rw-r--r--Documentation/devicetree/bindings/display/panel/renesas,r69328.yaml73
-rw-r--r--Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml16
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,amoled-mipi-dsi.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ams495qa01.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ams581vf01.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ams639rq08.yaml80
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml116
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6d16d0.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6d27a1.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6d7aa0.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e3fc2x01.yaml81
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e3ha8.yaml75
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams427ap24.yaml65
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e8aa0.yaml14
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,s6e8aa5x01-ams561ra01.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/panel/samsung,sofef00.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.yaml30
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq079l1sx01.yaml99
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,lq101r1sx01.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/sharp,ls060t1sx01.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml70
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml7
-rw-r--r--Documentation/devicetree/bindings/display/panel/sony,tulip-truly-nt35521.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/panel/synaptics,r63353.yaml6
-rw-r--r--Documentation/devicetree/bindings/display/panel/synaptics,td4300-panel.yaml89
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,td.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/truly,nt35597-2K-display.yaml97
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,g2647fb105.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,r66451.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml10
-rw-r--r--Documentation/devicetree/bindings/display/panel/visionox,rm692e5.yaml77
-rw-r--r--Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml5
-rw-r--r--Documentation/devicetree/bindings/display/renesas,cmm.yaml12
-rw-r--r--Documentation/devicetree/bindings/display/renesas,du.yaml101
-rw-r--r--Documentation/devicetree/bindings/display/renesas,rzg2l-du.yaml176
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/cdn-dp-rockchip.txt74
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,analogix-dp.yaml25
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,dw-dp.yaml149
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml46
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml12
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,inno-hdmi.yaml25
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml7
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3399-cdn-dp.yaml170
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3588-dw-hdmi-qp.yaml200
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip,rk3588-mipi-dsi2.yaml121
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml6
-rw-r--r--Documentation/devicetree/bindings/display/rockchip/rockchip-vop2.yaml143
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos5-dp.yaml163
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,exynos7-decon.yaml25
-rw-r--r--Documentation/devicetree/bindings/display/samsung/samsung,fimd.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/sharp,ls010b7dh04.yaml92
-rw-r--r--Documentation/devicetree/bindings/display/simple-framebuffer.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7567.yaml68
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7571.yaml78
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7586.txt22
-rw-r--r--Documentation/devicetree/bindings/display/sitronix,st7586.yaml61
-rw-r--r--Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml20
-rw-r--r--Documentation/devicetree/bindings/display/solomon,ssd132x.yaml12
-rw-r--r--Documentation/devicetree/bindings/display/solomon,ssd133x.yaml45
-rw-r--r--Documentation/devicetree/bindings/display/sprd/sprd,sharkl3-dpu.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/sprd/sprd,sharkl3-dsi-host.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml55
-rw-r--r--Documentation/devicetree/bindings/display/st,stm32mp25-lvds.yaml130
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra114-mipi.yaml1
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra114-tsec.yaml68
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-csi.yaml138
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-epp.yaml14
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml11
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-isp.yaml15
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-mpe.yaml18
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-vi.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/tegra/nvidia,tegra210-csi.yaml3
-rw-r--r--Documentation/devicetree/bindings/display/ti/ti,am625-oldi.yaml79
-rw-r--r--Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml223
-rw-r--r--Documentation/devicetree/bindings/display/ti/ti,opa362.txt38
-rw-r--r--Documentation/devicetree/bindings/display/truly,nt35597.txt59
-rw-r--r--Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml10
-rw-r--r--Documentation/devicetree/bindings/dma/adi,axi-dmac.txt61
-rw-r--r--Documentation/devicetree/bindings/dma/adi,axi-dmac.yaml129
-rw-r--r--Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml4
-rw-r--r--Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml15
-rw-r--r--Documentation/devicetree/bindings/dma/apm,xgene-storm-dma.yaml59
-rw-r--r--Documentation/devicetree/bindings/dma/apm-xgene-dma.txt47
-rw-r--r--Documentation/devicetree/bindings/dma/apple,admac.yaml17
-rw-r--r--Documentation/devicetree/bindings/dma/arm,dma-350.yaml44
-rw-r--r--Documentation/devicetree/bindings/dma/atmel,at91sam9g45-dma.yaml68
-rw-r--r--Documentation/devicetree/bindings/dma/atmel,sama5d4-dma.yaml82
-rw-r--r--Documentation/devicetree/bindings/dma/atmel-dma.txt42
-rw-r--r--Documentation/devicetree/bindings/dma/atmel-xdma.txt54
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,iproc-sba.txt29
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,iproc-sba.yaml41
-rw-r--r--Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2m.yaml84
-rw-r--r--Documentation/devicetree/bindings/dma/cirrus,ep9301-dma-m2p.yaml144
-rw-r--r--Documentation/devicetree/bindings/dma/dma-common.yaml7
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,edma.yaml191
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,elo-dma.yaml137
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,elo3-dma.yaml125
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,eloplus-dma.yaml132
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,imx-dma.yaml70
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,imx-sdma.yaml4
-rw-r--r--Documentation/devicetree/bindings/dma/fsl,mxs-dma.yaml54
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-imx-dma.txt50
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-qdma.txt58
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-qdma.yaml135
-rw-r--r--Documentation/devicetree/bindings/dma/loongson,ls1b-apbdma.yaml65
-rw-r--r--Documentation/devicetree/bindings/dma/lpc1850-dmamux.txt54
-rw-r--r--Documentation/devicetree/bindings/dma/marvell,mmp-dma.yaml72
-rw-r--r--Documentation/devicetree/bindings/dma/marvell,orion-xor.yaml84
-rw-r--r--Documentation/devicetree/bindings/dma/marvell,xor-v2.yaml61
-rw-r--r--Documentation/devicetree/bindings/dma/mediatek,mt7622-hsdma.yaml63
-rw-r--r--Documentation/devicetree/bindings/dma/mmp-dma.txt81
-rw-r--r--Documentation/devicetree/bindings/dma/mtk-hsdma.txt33
-rw-r--r--Documentation/devicetree/bindings/dma/mv-xor-v2.txt28
-rw-r--r--Documentation/devicetree/bindings/dma/mv-xor.txt40
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml1
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra20-apbdma.txt44
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra20-apbdma.yaml100
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml62
-rw-r--r--Documentation/devicetree/bindings/dma/nxp,lpc3220-dmamux.yaml49
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml6
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,gpi.yaml8
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt95
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml1
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml126
-rw-r--r--Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml15
-rw-r--r--Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml43
-rw-r--r--Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml6
-rw-r--r--Documentation/devicetree/bindings/dma/sophgo,cv1800b-dmamux.yaml51
-rw-r--r--Documentation/devicetree/bindings/dma/spacemit,k1-pdma.yaml68
-rw-r--r--Documentation/devicetree/bindings/dma/sprd,sc9860-dma.yaml92
-rw-r--r--Documentation/devicetree/bindings/dma/sprd-dma.txt44
-rw-r--r--Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml53
-rw-r--r--Documentation/devicetree/bindings/dma/stericsson,dma40.yaml1
-rw-r--r--Documentation/devicetree/bindings/dma/stm32/st,stm32-dma.yaml (renamed from Documentation/devicetree/bindings/dma/st,stm32-dma.yaml)9
-rw-r--r--Documentation/devicetree/bindings/dma/stm32/st,stm32-dma3.yaml141
-rw-r--r--Documentation/devicetree/bindings/dma/stm32/st,stm32-dmamux.yaml67
-rw-r--r--Documentation/devicetree/bindings/dma/stm32/st,stm32-mdma.yaml (renamed from Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml)4
-rw-r--r--Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt2
-rw-r--r--Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml5
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt23
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml10
-rw-r--r--Documentation/devicetree/bindings/dpll/dpll-device.yaml76
-rw-r--r--Documentation/devicetree/bindings/dpll/dpll-pin.yaml45
-rw-r--r--Documentation/devicetree/bindings/dpll/microchip,zl30731.yaml115
-rw-r--r--Documentation/devicetree/bindings/dsp/fsl,dsp.yaml58
-rw-r--r--Documentation/devicetree/bindings/dsp/mediatek,mt8195-dsp.yaml42
-rw-r--r--Documentation/devicetree/bindings/dts-coding-style.rst31
-rw-r--r--Documentation/devicetree/bindings/edac/altr,socfpga-ecc-manager.yaml324
-rw-r--r--Documentation/devicetree/bindings/edac/apm,xgene-edac.yaml202
-rw-r--r--Documentation/devicetree/bindings/edac/apm-xgene-edac.txt112
-rw-r--r--Documentation/devicetree/bindings/edac/aspeed,ast2400-sdram-edac.yaml48
-rw-r--r--Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt28
-rw-r--r--Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt383
-rw-r--r--Documentation/devicetree/bindings/eeprom/at24.yaml36
-rw-r--r--Documentation/devicetree/bindings/eeprom/at25.yaml10
-rw-r--r--Documentation/devicetree/bindings/eeprom/st,m24lr.yaml52
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/acer,aspire1-ec.yaml60
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/google,cros-ec.yaml (renamed from Documentation/devicetree/bindings/mfd/google,cros-ec.yaml)12
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/gw,gsc.yaml193
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/huawei,gaokun3-ec.yaml124
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/kontron,sl28cpld.yaml (renamed from Documentation/devicetree/bindings/mfd/kontron,sl28cpld.yaml)19
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/lenovo,thinkpad-t14s-ec.yaml50
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/lenovo,yoga-c630-ec.yaml83
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/microsoft,surface-sam.yaml47
-rw-r--r--Documentation/devicetree/bindings/embedded-controller/traverse,ten64-controller.yaml40
-rw-r--r--Documentation/devicetree/bindings/example-schema.yaml18
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml11
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-rt8973a.txt23
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-usb-gpio.txt21
-rw-r--r--Documentation/devicetree/bindings/extcon/linux,extcon-usb-gpio.yaml43
-rw-r--r--Documentation/devicetree/bindings/extcon/maxim,max14526.yaml80
-rw-r--r--Documentation/devicetree/bindings/extcon/richtek,rt8973a-muic.yaml49
-rw-r--r--Documentation/devicetree/bindings/firmware/arm,scmi.yaml105
-rw-r--r--Documentation/devicetree/bindings/firmware/cznic,turris-omnia-mcu.yaml86
-rw-r--r--Documentation/devicetree/bindings/firmware/fsl,scu.yaml12
-rw-r--r--Documentation/devicetree/bindings/firmware/google,gs101-acpm-ipc.yaml96
-rw-r--r--Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml52
-rw-r--r--Documentation/devicetree/bindings/firmware/intel,stratix10-svc.txt57
-rw-r--r--Documentation/devicetree/bindings/firmware/intel,stratix10-svc.yaml108
-rw-r--r--Documentation/devicetree/bindings/firmware/nvidia,tegra186-bpmp.yaml1
-rw-r--r--Documentation/devicetree/bindings/firmware/nxp,imx95-scmi-pinctrl.yaml53
-rw-r--r--Documentation/devicetree/bindings/firmware/nxp,imx95-scmi.yaml72
-rw-r--r--Documentation/devicetree/bindings/firmware/qcom,scm.yaml31
-rw-r--r--Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml1
-rw-r--r--Documentation/devicetree/bindings/firmware/thead,th1520-aon.yaml60
-rw-r--r--Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.yaml96
-rw-r--r--Documentation/devicetree/bindings/fpga/altera-passive-serial.txt29
-rw-r--r--Documentation/devicetree/bindings/fpga/altr,fpga-passive-serial.yaml74
-rw-r--r--Documentation/devicetree/bindings/fpga/fpga-region.txt479
-rw-r--r--Documentation/devicetree/bindings/fpga/fpga-region.yaml354
-rw-r--r--Documentation/devicetree/bindings/fpga/intel,stratix10-soc-fpga-mgr.yaml36
-rw-r--r--Documentation/devicetree/bindings/fpga/intel-stratix10-soc-fpga-mgr.txt18
-rw-r--r--Documentation/devicetree/bindings/fpga/lattice,ice40-fpga-mgr.yaml59
-rw-r--r--Documentation/devicetree/bindings/fpga/lattice-ice40-fpga-mgr.txt21
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,fpga-selectmap.yaml86
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,versal-fpga.yaml4
-rw-r--r--Documentation/devicetree/bindings/fsi/aspeed,ast2400-cf-fsi-master.yaml81
-rw-r--r--Documentation/devicetree/bindings/fsi/aspeed,ast2600-fsi-master.yaml121
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-controller.yaml66
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-master-aspeed.txt36
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-master-ast-cf.txt36
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-master-gpio.txt28
-rw-r--r--Documentation/devicetree/bindings/fsi/fsi-master-gpio.yaml63
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,fsi2spi.yaml36
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,i2cr-fsi-master.yaml5
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-fsi-controller.yaml45
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-occ.txt16
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-occ.yaml40
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-sbefifo.yaml46
-rw-r--r--Documentation/devicetree/bindings/fsi/ibm,p9-scom.yaml38
-rw-r--r--Documentation/devicetree/bindings/gnss/brcm,bcm4751.yaml1
-rw-r--r--Documentation/devicetree/bindings/gnss/gnss-common.yaml8
-rw-r--r--Documentation/devicetree/bindings/gnss/mediatek.yaml1
-rw-r--r--Documentation/devicetree/bindings/gnss/sirfstar.yaml1
-rw-r--r--Documentation/devicetree/bindings/gnss/u-blox,neo-6m.yaml23
-rw-r--r--Documentation/devicetree/bindings/goldfish/pipe.txt2
-rw-r--r--Documentation/devicetree/bindings/gpio/8xxx_gpio.txt72
-rw-r--r--Documentation/devicetree/bindings/gpio/abilis,tb10x-gpio.txt35
-rw-r--r--Documentation/devicetree/bindings/gpio/abilis,tb10x-gpio.yaml63
-rw-r--r--Documentation/devicetree/bindings/gpio/altr-pio-1.0.yaml75
-rw-r--r--Documentation/devicetree/bindings/gpio/apm,xgene-gpio-sb.yaml94
-rw-r--r--Documentation/devicetree/bindings/gpio/apple,smc-gpio.yaml29
-rw-r--r--Documentation/devicetree/bindings/gpio/aspeed,ast2400-gpio.yaml169
-rw-r--r--Documentation/devicetree/bindings/gpio/aspeed,sgpio.yaml10
-rw-r--r--Documentation/devicetree/bindings/gpio/atmel,at91rm9200-gpio.yaml81
-rw-r--r--Documentation/devicetree/bindings/gpio/blaize,blzp1600-gpio.yaml77
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.yaml7
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,xgs-iproc-gpio.yaml1
-rw-r--r--Documentation/devicetree/bindings/gpio/cavium,octeon-3860-gpio.yaml62
-rw-r--r--Documentation/devicetree/bindings/gpio/cavium-octeon-gpio.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/cdns,gpio.txt43
-rw-r--r--Documentation/devicetree/bindings/gpio/cdns,gpio.yaml84
-rw-r--r--Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt17
-rw-r--r--Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.yaml49
-rw-r--r--Documentation/devicetree/bindings/gpio/exar,xra1403.yaml75
-rw-r--r--Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml49
-rw-r--r--Documentation/devicetree/bindings/gpio/fcs,fxl6408.yaml58
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl,qoriq-gpio.yaml95
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl-imx-gpio.yaml15
-rw-r--r--Documentation/devicetree/bindings/gpio/gateworks,pld-gpio.txt20
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-74xx-mmio.txt30
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-altera.txt44
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-aspeed.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-ath79.txt37
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-clps711x.txt28
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-davinci.yaml2
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-dsp-keystone.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-ep9301.yaml9
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-lp3943.txt37
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-max3191x.txt59
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-max77620.txt25
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mm-lantiq.txt38
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mmio.yaml49
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-moxtet.txt18
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt53
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mvebu.yaml12
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mxs.yaml160
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-nmk.txt31
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-palmas.txt27
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml53
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml17
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pisosr.txt34
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml51
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-ts4800.txt20
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-ts4900.txt30
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-twl4030.txt29
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-vf610.yaml12
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xgene-sb.txt64
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xgene.txt22
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xra1403.txt46
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-zevio.txt16
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio.txt12
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_atmel.txt31
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_lpc32xx.txt43
-rw-r--r--Documentation/devicetree/bindings/gpio/ibm,ppc4xx-gpio.txt24
-rw-r--r--Documentation/devicetree/bindings/gpio/kontron,sl28cpld-gpio.yaml2
-rw-r--r--Documentation/devicetree/bindings/gpio/lacie,netxbig-gpio-ext.yaml60
-rw-r--r--Documentation/devicetree/bindings/gpio/lantiq,gpio-mm-lantiq.yaml54
-rw-r--r--Documentation/devicetree/bindings/gpio/loongson,ls-gpio.yaml31
-rw-r--r--Documentation/devicetree/bindings/gpio/loongson,ls1x-gpio.yaml49
-rw-r--r--Documentation/devicetree/bindings/gpio/lsi,zevio-gpio.yaml43
-rw-r--r--Documentation/devicetree/bindings/gpio/maxim,max31910.yaml104
-rw-r--r--Documentation/devicetree/bindings/gpio/maxim,max7360-gpio.yaml83
-rw-r--r--Documentation/devicetree/bindings/gpio/maxim,max77759-gpio.yaml44
-rw-r--r--Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml34
-rw-r--r--Documentation/devicetree/bindings/gpio/microchip,pic32-gpio.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/microchip,pic32mzda-gpio.yaml71
-rw-r--r--Documentation/devicetree/bindings/gpio/mrvl-gpio.yaml2
-rw-r--r--Documentation/devicetree/bindings/gpio/netxbig-gpio-ext.txt22
-rw-r--r--Documentation/devicetree/bindings/gpio/nintendo,hollywood-gpio.txt26
-rw-r--r--Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.yaml5
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,lpc1850-gpio.txt59
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,lpc1850-gpio.yaml78
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,lpc3220-gpio.yaml50
-rw-r--r--Documentation/devicetree/bindings/gpio/nxp,pcf8575.yaml63
-rw-r--r--Documentation/devicetree/bindings/gpio/pisosr-gpio.yaml67
-rw-r--r--Documentation/devicetree/bindings/gpio/pl061-gpio.yaml3
-rw-r--r--Documentation/devicetree/bindings/gpio/qca,ar7100-gpio.yaml60
-rw-r--r--Documentation/devicetree/bindings/gpio/raspberrypi,firmware-gpio.txt30
-rw-r--r--Documentation/devicetree/bindings/gpio/realtek,otto-gpio.yaml8
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,em-gio.yaml20
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,rcar-gpio.yaml25
-rw-r--r--Documentation/devicetree/bindings/gpio/rockchip,gpio-bank.yaml3
-rw-r--r--Documentation/devicetree/bindings/gpio/rockchip,rk3328-grf-gpio.yaml50
-rw-r--r--Documentation/devicetree/bindings/gpio/sifive,gpio.yaml6
-rw-r--r--Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt21
-rw-r--r--Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml4
-rw-r--r--Documentation/devicetree/bindings/gpio/socionext,uniphier-gpio.yaml11
-rw-r--r--Documentation/devicetree/bindings/gpio/spacemit,k1-gpio.yaml96
-rw-r--r--Documentation/devicetree/bindings/gpio/spear_spics.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/st,nomadik-gpio.yaml96
-rw-r--r--Documentation/devicetree/bindings/gpio/st,spear-spics-gpio.yaml82
-rw-r--r--Documentation/devicetree/bindings/gpio/ti,keystone-dsp-gpio.yaml65
-rw-r--r--Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml61
-rw-r--r--Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml24
-rw-r--r--Documentation/devicetree/bindings/gpio/trivial-gpio.yaml112
-rw-r--r--Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml49
-rw-r--r--Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml2
-rw-r--r--Documentation/devicetree/bindings/gpu/apple,agx.yaml100
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-bifrost.yaml21
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-midgard.yaml11
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-utgard.yaml3
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml185
-rw-r--r--Documentation/devicetree/bindings/gpu/aspeed,ast2400-gfx.yaml63
-rw-r--r--Documentation/devicetree/bindings/gpu/aspeed-gfx.txt41
-rw-r--r--Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml90
-rw-r--r--Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml185
-rw-r--r--Documentation/devicetree/bindings/gpu/img,powervr-sgx.yaml138
-rw-r--r--Documentation/devicetree/bindings/gpu/img,powervr.yaml73
-rw-r--r--Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt115
-rw-r--r--Documentation/devicetree/bindings/gpu/nvidia,gk20a.yaml171
-rw-r--r--Documentation/devicetree/bindings/hwinfo/samsung,exynos-chipid.yaml7
-rw-r--r--Documentation/devicetree/bindings/hwinfo/via,vt8500-scc-id.yaml37
-rw-r--r--Documentation/devicetree/bindings/hwlock/sprd,hwspinlock-r3p0.yaml50
-rw-r--r--Documentation/devicetree/bindings/hwlock/sprd-hwspinlock.txt23
-rw-r--r--Documentation/devicetree/bindings/hwmon/adc128d818.txt38
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ad741x.yaml1
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,adm1177.yaml5
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,adm1266.yaml2
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,adm1275.yaml23
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc2945.yaml5
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml1
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc2991.yaml1
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc2992.yaml2
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,ltc4282.yaml159
-rw-r--r--Documentation/devicetree/bindings/hwmon/adi,max31827.yaml1
-rw-r--r--Documentation/devicetree/bindings/hwmon/adt7475.yaml40
-rw-r--r--Documentation/devicetree/bindings/hwmon/amphenol,chipcap2.yaml77
-rw-r--r--Documentation/devicetree/bindings/hwmon/apm,xgene-slimpro-hwmon.yaml30
-rw-r--r--Documentation/devicetree/bindings/hwmon/apm-xgene-hwmon.txt14
-rw-r--r--Documentation/devicetree/bindings/hwmon/as370.txt11
-rw-r--r--Documentation/devicetree/bindings/hwmon/aspeed,g6-pwm-tach.yaml74
-rw-r--r--Documentation/devicetree/bindings/hwmon/fan-common.yaml79
-rw-r--r--Documentation/devicetree/bindings/hwmon/g762.txt47
-rw-r--r--Documentation/devicetree/bindings/hwmon/gmt,g762.yaml95
-rw-r--r--Documentation/devicetree/bindings/hwmon/gpio-fan.yaml3
-rw-r--r--Documentation/devicetree/bindings/hwmon/hwmon-common.yaml19
-rw-r--r--Documentation/devicetree/bindings/hwmon/ibm,opal-sensor.yaml37
-rw-r--r--Documentation/devicetree/bindings/hwmon/ibm,p8-occ-hwmon.txt25
-rw-r--r--Documentation/devicetree/bindings/hwmon/ibmpowernv.txt23
-rw-r--r--Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml3
-rw-r--r--Documentation/devicetree/bindings/hwmon/lantiq,cputemp.yaml30
-rw-r--r--Documentation/devicetree/bindings/hwmon/lltc,ltc2978.yaml103
-rw-r--r--Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml5
-rw-r--r--Documentation/devicetree/bindings/hwmon/lltc,ltc4286.yaml9
-rw-r--r--Documentation/devicetree/bindings/hwmon/lm75.yaml6
-rw-r--r--Documentation/devicetree/bindings/hwmon/lm87.txt30
-rw-r--r--Documentation/devicetree/bindings/hwmon/ltc2978.txt62
-rw-r--r--Documentation/devicetree/bindings/hwmon/ltq-cputemp.txt10
-rw-r--r--Documentation/devicetree/bindings/hwmon/max31785.txt22
-rw-r--r--Documentation/devicetree/bindings/hwmon/max6650.txt28
-rw-r--r--Documentation/devicetree/bindings/hwmon/maxim,max20730.yaml2
-rw-r--r--Documentation/devicetree/bindings/hwmon/maxim,max31790.yaml84
-rw-r--r--Documentation/devicetree/bindings/hwmon/maxim,max6639.yaml91
-rw-r--r--Documentation/devicetree/bindings/hwmon/maxim,max6650.yaml69
-rw-r--r--Documentation/devicetree/bindings/hwmon/microchip,emc2305.yaml111
-rw-r--r--Documentation/devicetree/bindings/hwmon/national,lm90.yaml15
-rw-r--r--Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml3
-rw-r--r--Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml2
-rw-r--r--Documentation/devicetree/bindings/hwmon/nuvoton,nct7363.yaml65
-rw-r--r--Documentation/devicetree/bindings/hwmon/nuvoton,nct7802.yaml1
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/adi,adp1050.yaml60
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/adi,lt3074.yaml50
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/adi,max17616.yaml52
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/infineon,tda38640.yaml28
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/isil,isl68137.yaml151
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/mps,mp2975.yaml75
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/mps,mpq8785.yaml74
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/ti,lm25066.yaml17
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/ti,tps25990.yaml83
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/ti,ucd90320.yaml15
-rw-r--r--Documentation/devicetree/bindings/hwmon/pmbus/vicor,pli1209bc.yaml62
-rw-r--r--Documentation/devicetree/bindings/hwmon/pwm-fan.txt1
-rw-r--r--Documentation/devicetree/bindings/hwmon/pwm-fan.yaml19
-rw-r--r--Documentation/devicetree/bindings/hwmon/renesas,isl28022.yaml64
-rw-r--r--Documentation/devicetree/bindings/hwmon/sophgo,sg2042-hwmon-mcu.yaml47
-rw-r--r--Documentation/devicetree/bindings/hwmon/st,stts751.yaml41
-rw-r--r--Documentation/devicetree/bindings/hwmon/st,tsc1641.yaml63
-rw-r--r--Documentation/devicetree/bindings/hwmon/stts751.txt15
-rw-r--r--Documentation/devicetree/bindings/hwmon/syna,as370.yaml32
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,adc128d818.yaml62
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,ads7828.yaml1
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,amc6821.yaml108
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,ina2xx.yaml93
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,lm87.yaml70
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml9
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml18
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml7
-rw-r--r--Documentation/devicetree/bindings/hwmon/ti,tps23861.yaml7
-rw-r--r--Documentation/devicetree/bindings/hwmon/winbond,w83781d.yaml1
-rw-r--r--Documentation/devicetree/bindings/i2c/amlogic,meson6-i2c.yaml3
-rw-r--r--Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml36
-rw-r--r--Documentation/devicetree/bindings/i2c/apple,i2c.yaml22
-rw-r--r--Documentation/devicetree/bindings/i2c/aspeed,i2c.yaml5
-rw-r--r--Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml16
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,brcmstb-i2c.yaml28
-rw-r--r--Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml2
-rw-r--r--Documentation/devicetree/bindings/i2c/hisilicon,hix5hd2-i2c.yaml51
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.yaml115
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-exynos5.yaml9
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-fsi.txt40
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-hix5hd2.txt24
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml6
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-imx.yaml4
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-lpc2k.txt33
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mpc.yaml2
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml7
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-gpio.yaml3
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-gpmux.yaml1
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml30
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pnx.txt34
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pxa.yaml2
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-rk3x.yaml7
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sprd.txt31
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-wmt.txt24
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt15
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c.txt151
-rw-r--r--Documentation/devicetree/bindings/i2c/ibm,i2c-fsi.yaml76
-rw-r--r--Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml1
-rw-r--r--Documentation/devicetree/bindings/i2c/microchip,corei2c.yaml4
-rw-r--r--Documentation/devicetree/bindings/i2c/nvidia,tegra186-bpmp-i2c.yaml3
-rw-r--r--Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml70
-rw-r--r--Documentation/devicetree/bindings/i2c/nxp,lpc1788-i2c.yaml54
-rw-r--r--Documentation/devicetree/bindings/i2c/nxp,pnx-i2c.yaml49
-rw-r--r--Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml97
-rw-r--r--Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml1
-rw-r--r--Documentation/devicetree/bindings/i2c/qcom,i2c-qup.yaml16
-rw-r--r--Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml104
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml14
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,rcar-i2c.yaml21
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,riic.yaml160
-rw-r--r--Documentation/devicetree/bindings/i2c/renesas,rmobile-iic.yaml24
-rw-r--r--Documentation/devicetree/bindings/i2c/samsung,s3c2410-i2c.yaml9
-rw-r--r--Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml29
-rw-r--r--Documentation/devicetree/bindings/i2c/spacemit,k1-i2c.yaml64
-rw-r--r--Documentation/devicetree/bindings/i2c/sprd,sc9860-i2c.yaml65
-rw-r--r--Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml54
-rw-r--r--Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml70
-rw-r--r--Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml66
-rw-r--r--Documentation/devicetree/bindings/i2c/tsd,mule-i2c-mux.yaml67
-rw-r--r--Documentation/devicetree/bindings/i2c/wm,wm8505-i2c.yaml47
-rw-r--r--Documentation/devicetree/bindings/i3c/adi,i3c-master.yaml72
-rw-r--r--Documentation/devicetree/bindings/i3c/aspeed,ast2600-i3c.yaml2
-rw-r--r--Documentation/devicetree/bindings/i3c/cdns,i3c-master.yaml9
-rw-r--r--Documentation/devicetree/bindings/i3c/i3c.yaml11
-rw-r--r--Documentation/devicetree/bindings/i3c/mipi-i3c-hci.yaml2
-rw-r--r--Documentation/devicetree/bindings/i3c/renesas,i3c.yaml185
-rw-r--r--Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml45
-rw-r--r--Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml22
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adis16240.yaml4
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl313.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml22
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl372.yaml5
-rw-r--r--Documentation/devicetree/bindings/iio/accel/adi,adxl380.yaml95
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bosch,bma220.yaml9
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml11
-rw-r--r--Documentation/devicetree/bindings/iio/accel/kionix,kxcjk1013.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/accel/kionix,kxsd9.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/accel/lis302.txt2
-rw-r--r--Documentation/devicetree/bindings/iio/accel/nxp,fxls8962af.yaml20
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adc.yaml35
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4000.yaml246
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4030.yaml110
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4080.yaml101
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4170-4.yaml554
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4695.yaml268
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad4851.yaml155
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7091r5.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml37
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7173.yaml483
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7191.yaml149
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml138
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7380.yaml316
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7405.yaml60
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7476.yaml163
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml356
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7625.yaml176
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7768-1.yaml68
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7779.yaml152
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml11
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7944.yaml213
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7949.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad9467.yaml7
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ade9000.yaml94
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,axi-adc.yaml85
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,max14001.yaml89
-rw-r--r--Documentation/devicetree/bindings/iio/adc/allwinner,sun20i-d1-gpadc.yaml9
-rw-r--r--Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.yaml5
-rw-r--r--Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/cosmic,10001-adc.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/adc/gehc,pmc-adc.yaml86
-rw-r--r--Documentation/devicetree/bindings/iio/adc/lltc,ltc2496.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max1238.yaml3
-rw-r--r--Documentation/devicetree/bindings/iio/adc/maxim,max1241.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mediatek,mt2701-auxadc.yaml6
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mediatek,mt6359-auxadc.yaml35
-rw-r--r--Documentation/devicetree/bindings/iio/adc/microchip,mcp3911.yaml5
-rw-r--r--Documentation/devicetree/bindings/iio/adc/microchip,pac1921.yaml71
-rw-r--r--Documentation/devicetree/bindings/iio/adc/microchip,pac1934.yaml120
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nuvoton,nct7201.yaml70
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nxp,imx93-adc.yaml13
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nxp,lpc3220-adc.yaml3
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-rradc.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/renesas,r9a09g077-adc.yaml135
-rw-r--r--Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml37
-rw-r--r--Documentation/devicetree/bindings/iio/adc/renesas,rzn1-adc.yaml111
-rw-r--r--Documentation/devicetree/bindings/iio/adc/richtek,rtq6056.yaml9
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml11
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rohm,bd79104.yaml76
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rohm,bd79112.yaml104
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rohm,bd79124.yaml114
-rw-r--r--Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml33
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml25
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sophgo,cv1800b-saradc.yaml83
-rw-r--r--Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml17
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,spear600-adc.yaml69
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml111
-rw-r--r--Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml130
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,adc128s052.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads1015.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads1119.yaml155
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads1298.yaml79
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti,ads7138.yaml63
-rw-r--r--Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml13
-rw-r--r--Documentation/devicetree/bindings/iio/adc/xlnx,zynqmp-ams.yaml6
-rw-r--r--Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.yaml4
-rw-r--r--Documentation/devicetree/bindings/iio/afe/voltage-divider.yaml12
-rw-r--r--Documentation/devicetree/bindings/iio/amplifiers/adi,hmc425a.yaml47
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/bosch,bme680.yaml62
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/sciosense,ens160.yaml70
-rw-r--r--Documentation/devicetree/bindings/iio/chemical/winsen,mhz19b.yaml33
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad3530r.yaml100
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad3552r.yaml56
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5380.yaml18
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5446.yaml138
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5686.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5696.yaml3
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5770r.yaml3
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad5791.yaml39
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad7293.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad8460.yaml164
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ad9739a.yaml95
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,axi-dac.yaml125
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ltc2664.yaml181
-rw-r--r--Documentation/devicetree/bindings/iio/dac/adi,ltc2672.yaml160
-rw-r--r--Documentation/devicetree/bindings/iio/dac/dac.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/dac/microchip,mcp4821.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/dac/rohm,bd79703.yaml81
-rw-r--r--Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml4
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ti,dac5571.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/filter/adi,admv8818.yaml20
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adf4371.yaml8
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml6
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml10
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,admfm2000.yaml127
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml8
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/invensense,itg3200.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/health/maxim,max30100.yaml8
-rw-r--r--Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml14
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/sciosense,ens210.yaml55
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/ti,hdc2010.yaml3
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/ti,hdc3020.yaml8
-rw-r--r--Documentation/devicetree/bindings/iio/imu/adi,adis16460.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml31
-rw-r--r--Documentation/devicetree/bindings/iio/imu/adi,adis16480.yaml39
-rw-r--r--Documentation/devicetree/bindings/iio/imu/adi,adis16550.yaml74
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,bmi160.yaml7
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,bmi270.yaml80
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,bmi323.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,smi240.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/imu/bosch,smi330.yaml90
-rw-r--r--Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml19
-rw-r--r--Documentation/devicetree/bindings/iio/imu/invensense,icm45600.yaml90
-rw-r--r--Documentation/devicetree/bindings/iio/imu/invensense,mpu6050.yaml26
-rw-r--r--Documentation/devicetree/bindings/iio/imu/nxp,fxos8700.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml4
-rw-r--r--Documentation/devicetree/bindings/iio/light/ams,as73211.yaml7
-rw-r--r--Documentation/devicetree/bindings/iio/light/avago,apds9300.yaml20
-rw-r--r--Documentation/devicetree/bindings/iio/light/avago,apds9960.yaml44
-rw-r--r--Documentation/devicetree/bindings/iio/light/bh1750.yaml6
-rw-r--r--Documentation/devicetree/bindings/iio/light/brcm,apds9160.yaml78
-rw-r--r--Documentation/devicetree/bindings/iio/light/dynaimage,al3010.yaml8
-rw-r--r--Documentation/devicetree/bindings/iio/light/dynaimage,al3320a.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/light/liteon,ltrf216a.yaml4
-rw-r--r--Documentation/devicetree/bindings/iio/light/rohm,bh1745.yaml53
-rw-r--r--Documentation/devicetree/bindings/iio/light/rohm,bu27008.yaml49
-rw-r--r--Documentation/devicetree/bindings/iio/light/rohm,bu27010.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/light/rohm,bu27034.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/light/rohm,bu27034anuc.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/light/st,vl6180.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/light/stk33xx.yaml13
-rw-r--r--Documentation/devicetree/bindings/iio/light/ti,opt3001.yaml4
-rw-r--r--Documentation/devicetree/bindings/iio/light/ti,opt4060.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/light/veml6030.yaml64
-rw-r--r--Documentation/devicetree/bindings/iio/light/vishay,veml6030.yaml107
-rw-r--r--Documentation/devicetree/bindings/iio/light/vishay,veml6046x00.yaml51
-rw-r--r--Documentation/devicetree/bindings/iio/light/vishay,veml6075.yaml29
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/allegromicro,als31300.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/asahi-kasei,ak8975.yaml5
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/bosch,bmc150_magn.yaml3
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/fsl,mag3110.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/infineon,tlv493d-a1b6.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/silabs,si7210.yaml48
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/voltafield,af8133j.yaml59
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/aosong,adp810.yaml45
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/bmp085.yaml50
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/fsl,mpl3115.yaml71
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/honeywell,hsc030pa.yaml5
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/honeywell,mprls0025pa.yaml100
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/infineon,dps310.yaml54
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/invensense,icp10100.yaml52
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/pressure/sensirion,sdp500.yaml46
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/awinic,aw96103.yaml61
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/nicera,d3323aa.yaml62
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/semtech,sx9324.yaml1
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/tyhx,hx9023s.yaml93
-rw-r--r--Documentation/devicetree/bindings/iio/st,st-sensors.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml25
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/maxim,max31865.yaml20
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/microchip,mcp9600.yaml56
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/ti,tmp006.yaml6
-rw-r--r--Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml12
-rw-r--r--Documentation/devicetree/bindings/incomplete-devices.yaml285
-rw-r--r--Documentation/devicetree/bindings/input/adi,adp5588.yaml38
-rw-r--r--Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml5
-rw-r--r--Documentation/devicetree/bindings/input/atmel,captouch.txt36
-rw-r--r--Documentation/devicetree/bindings/input/atmel,captouch.yaml59
-rw-r--r--Documentation/devicetree/bindings/input/atmel,maxtouch.yaml3
-rw-r--r--Documentation/devicetree/bindings/input/awinic,aw86927.yaml48
-rw-r--r--Documentation/devicetree/bindings/input/azoteq,iqs7222.yaml2
-rw-r--r--Documentation/devicetree/bindings/input/cirrus,cs40l50.yaml68
-rw-r--r--Documentation/devicetree/bindings/input/cirrus,ep9307-keypad.yaml87
-rw-r--r--Documentation/devicetree/bindings/input/cypress,cyapa.yaml2
-rw-r--r--Documentation/devicetree/bindings/input/da9062-onkey.txt47
-rw-r--r--Documentation/devicetree/bindings/input/dlg,da7280.txt108
-rw-r--r--Documentation/devicetree/bindings/input/dlg,da7280.yaml248
-rw-r--r--Documentation/devicetree/bindings/input/dlg,da9062-onkey.yaml38
-rw-r--r--Documentation/devicetree/bindings/input/elan,ekth6915.yaml35
-rw-r--r--Documentation/devicetree/bindings/input/goodix,gt7986u-spifw.yaml69
-rw-r--r--Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt49
-rw-r--r--Documentation/devicetree/bindings/input/gpio-matrix-keypad.yaml103
-rw-r--r--Documentation/devicetree/bindings/input/ilitek,ili2901.yaml66
-rw-r--r--Documentation/devicetree/bindings/input/lpc32xx-key.txt34
-rw-r--r--Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml5
-rw-r--r--Documentation/devicetree/bindings/input/mediatek,pmic-keys.yaml4
-rw-r--r--Documentation/devicetree/bindings/input/nxp,lpc3220-key.yaml61
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8921-keypad.yaml46
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8921-pwrkey.yaml36
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml42
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.yaml17
-rw-r--r--Documentation/devicetree/bindings/input/rotary-encoder.txt50
-rw-r--r--Documentation/devicetree/bindings/input/rotary-encoder.yaml90
-rw-r--r--Documentation/devicetree/bindings/input/samsung,s3c6410-keypad.yaml121
-rw-r--r--Documentation/devicetree/bindings/input/samsung-keypad.txt77
-rw-r--r--Documentation/devicetree/bindings/input/syna,rmi4.yaml20
-rw-r--r--Documentation/devicetree/bindings/input/tca8418_keypad.txt10
-rw-r--r--Documentation/devicetree/bindings/input/ti,drv266x.yaml1
-rw-r--r--Documentation/devicetree/bindings/input/ti,nspire-keypad.txt60
-rw-r--r--Documentation/devicetree/bindings/input/ti,nspire-keypad.yaml74
-rw-r--r--Documentation/devicetree/bindings/input/ti,tca8418.yaml61
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ad7879.txt71
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/adi,ad7879.yaml150
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ads7846.txt106
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/apple,z2-multitouch.yaml70
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs7211.yaml4
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/bu21013.txt43
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/colibri-vf50-ts.txt34
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml16
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/eeti,exc3000.yaml54
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/eeti.txt30
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt18
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ektf2127.txt25
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/elan,ektf2127.yaml58
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/fsl,imx6ul-tsc.yaml111
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml96
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/goodix.yaml8
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml1
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/himax,hx852es.yaml81
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/hynitron,cst816x.yaml65
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml41
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt38
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt16
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/max11801-ts.txt17
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/maxim,max11801.yaml46
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml6
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/novatek,nvt-ts.yaml62
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/nxp,lpc3220-tsc.yaml43
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/raspberrypi,firmware-ts.txt26
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/resistive-adc-touch.yaml2
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/rohm,bu21013.yaml95
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/semtech,sx8654.yaml52
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml2
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/sitronix,st1232.yaml29
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/st,stmfts.yaml2
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/sx8654.txt23
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ti,ads7843.yaml183
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ti,tsc2007.yaml77
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/toradex,vf50-touchscreen.yaml77
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt1
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml123
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt39
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zeitec,zet6223.yaml62
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zet6223.txt30
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml10
-rw-r--r--Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt21
-rw-r--r--Documentation/devicetree/bindings/input/zii,rave-sp-pwrbutton.txt22
-rw-r--r--Documentation/devicetree/bindings/input/zii,rave-sp-pwrbutton.yaml36
-rw-r--r--Documentation/devicetree/bindings/interconnect/mediatek,cci.yaml11
-rw-r--r--Documentation/devicetree/bindings/interconnect/mediatek,mt8183-emi.yaml51
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,glymur-rpmh.yaml172
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,kaanapali-rpmh.yaml124
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,milos-rpmh.yaml136
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8939.yaml33
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8953.yaml100
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8974.yaml20
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml15
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml12
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,qcs615-rpmh.yaml73
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,qcs8300-rpmh.yaml72
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml15
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml36
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sa8775p-rpmh.yaml52
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sar2130p-rpmh.yaml117
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml57
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml4
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sdx75-rpmh.yaml16
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm6350-rpmh.yaml65
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm7150-rpmh.yaml84
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml4
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm8550-rpmh.yaml2
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm8650-rpmh.yaml2
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,sm8750-rpmh.yaml136
-rw-r--r--Documentation/devicetree/bindings/interconnect/qcom,x1e80100-rpmh.yaml2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/abilis,tb10x-ictl.txt37
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/abilis,tb10x-ictl.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.yaml49
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml9
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/altr,msi-controller.yaml65
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.yaml46
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml23
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/andestech,plicsw.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/apm,xgene1-msi.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml14
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml15
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5-iwb.yaml78
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v5.yaml267
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic.yaml5
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,nvic.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,nvic.yaml62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.txt38
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.yaml61
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.yaml46
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.txt23
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.yaml62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2500-scu-ic.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2700-intc.yaml90
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2xxx-scu-ic.txt23
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/atmel,aic.yaml90
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2712-msix.yaml60
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt131
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.yaml162
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt37
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.yaml49
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt55
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.yaml81
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.yaml30
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-mx.txt18
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-pic.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-pic.yaml50
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/chrp,open-pic.yaml78
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cirrus,clps711x-intc.txt41
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cirrus,ep7209-intc.yaml71
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cnxt,cx92755-ic.yaml47
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,apb-intc.txt62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,apb-intc.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.yaml43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/digicolor-ic.txt21
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/econet,en751221-intc.yaml78
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt17
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.yaml34
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.yaml51
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,icoll.yaml45
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,imx8qxp-dc-intc.yaml318
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,intmux.yaml3
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml28
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,ls-extirq.yaml27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,ls-msi.yaml79
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt30
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,mpic-msi.yaml161
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,mu-msi.yaml4
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,tzic.yaml48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,vf610-mscm-ir.yaml62
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt30
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.yaml47
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt84
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.yaml76
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt105
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.yaml79
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt26
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/jcore,aic.yaml43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/kontron,sl28cpld-intc.yaml2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt18
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.yaml43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,ap806-gicp.yaml50
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,ap806-sei.yaml58
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,armada-370-xp-mpic.txt38
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,cp110-icu.yaml101
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,gicp.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,icu.txt112
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,mpic.yaml63
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt42
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,orion-bridge-intc.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,orion-intc.txt48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,sei.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mediatek,mt6577-sysirq.yaml86
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt44
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt67
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/microchip,pic32mzda-evic.yaml60
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mscc,ocelot-icpu-intr.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra20-ictlr.txt41
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra20-ictlr.yaml82
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.txt58
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.yaml68
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/open-pic.txt97
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/opencores,or1k-pic.txt23
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/opencores,or1k-pic.yaml38
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/openrisc,ompic.txt22
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/openrisc,ompic.yaml45
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qca,ar7100-cpu-intc.yaml61
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qca,ar7100-misc-intc.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qca,ath79-cpu-intc.txt44
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt45
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.yaml7
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml20
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml61
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,rzv2h-icu.yaml280
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,aplic.yaml180
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.txt52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,cpu-intc.yaml73
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,imsics.yaml172
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,rpmi-mpxy-system-msi.yaml67
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/riscv,rpmi-system-msi.yaml74
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml11
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,arc700-intc.txt24
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,arc700-intc.yaml42
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt46
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.yaml48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,archs-intc.txt22
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,archs-intc.yaml48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,dw-apb-ictl.txt43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,dw-apb-ictl.yaml64
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sophgo,sg2042-msi.yaml63
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,spear300-shirq.yaml67
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt44
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml17
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/starfive,jh8100-intc.yaml61
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800-irqc.yaml49
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt14
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-mswi.yaml16
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-sswi.yaml108
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,cp-intc.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,cp-intc.yaml50
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,keystone-irq.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,keystone-irq.yaml63
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap-intc-irq.txt28
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap-intc-irq.yaml52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap2-intc.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu31
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu.yaml55
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,sci-inta.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/via,vt8500-intc.txt16
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/via,vt8500-intc.yaml76
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/xlnx,intc.yaml82
-rw-r--r--Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml7
-rw-r--r--Documentation/devicetree/bindings/iommu/apple,dart.yaml14
-rw-r--r--Documentation/devicetree/bindings/iommu/apple,sart.yaml5
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml73
-rw-r--r--Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml14
-rw-r--r--Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt64
-rw-r--r--Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml78
-rw-r--r--Documentation/devicetree/bindings/iommu/qcom,iommu.yaml9
-rw-r--r--Documentation/devicetree/bindings/iommu/qcom,tbu.yaml69
-rw-r--r--Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml1
-rw-r--r--Documentation/devicetree/bindings/iommu/riscv,iommu.yaml147
-rw-r--r--Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml1
-rw-r--r--Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt28
-rw-r--r--Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.yaml44
-rw-r--r--Documentation/devicetree/bindings/ipmi/aspeed,ast2400-kcs-bmc.yaml3
-rw-r--r--Documentation/devicetree/bindings/ipmi/ipmb-dev.yaml56
-rw-r--r--Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt40
-rw-r--r--Documentation/devicetree/bindings/ipmi/nuvoton,npcm750-kcs-bmc.yaml55
-rw-r--r--Documentation/devicetree/bindings/leds/ams,as3645a.txt85
-rw-r--r--Documentation/devicetree/bindings/leds/ams,as3645a.yaml130
-rw-r--r--Documentation/devicetree/bindings/leds/awinic,aw200xx.yaml2
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/apple,dwi-bl.yaml57
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/arc,arc2c0608.yaml108
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/arcxcnn_bl.txt33
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/awinic,aw99706.yaml101
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/kinetic,ktd2801.yaml46
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml6
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml4
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/ti,lm3509.yaml136
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/ti,lp8864.yaml80
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/zii,rave-sp-backlight.txt23
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/zii,rave-sp-backlight.yaml36
-rw-r--r--Documentation/devicetree/bindings/leds/common.yaml38
-rw-r--r--Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml8
-rw-r--r--Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml1
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm63138.yaml15
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm6328.yaml4
-rw-r--r--Documentation/devicetree/bindings/leds/leds-bcm6358.txt2
-rw-r--r--Documentation/devicetree/bindings/leds/leds-class-multicolor.yaml2
-rw-r--r--Documentation/devicetree/bindings/leds/leds-consumer.yaml67
-rw-r--r--Documentation/devicetree/bindings/leds/leds-group-multicolor.yaml5
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm3692x.txt65
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp50xx.yaml19
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp55xx.yaml11
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp8860.txt50
-rw-r--r--Documentation/devicetree/bindings/leds/leds-mt6323.txt63
-rw-r--r--Documentation/devicetree/bindings/leds/leds-mt6360.yaml199
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pca955x.txt89
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml4
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pwm.yaml14
-rw-r--r--Documentation/devicetree/bindings/leds/leds-qcom-lpg.yaml117
-rw-r--r--Documentation/devicetree/bindings/leds/leds-sc27xx-bltc.txt43
-rw-r--r--Documentation/devicetree/bindings/leds/leds-tlc591xx.txt40
-rw-r--r--Documentation/devicetree/bindings/leds/nxp,pca955x.yaml158
-rw-r--r--Documentation/devicetree/bindings/leds/nxp,pca963x.yaml140
-rw-r--r--Documentation/devicetree/bindings/leds/nxp,pca995x.yaml6
-rw-r--r--Documentation/devicetree/bindings/leds/onnn,ncp5623.yaml98
-rw-r--r--Documentation/devicetree/bindings/leds/pca963x.txt52
-rw-r--r--Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml2
-rw-r--r--Documentation/devicetree/bindings/leds/qcom,spmi-flash-led.yaml2
-rw-r--r--Documentation/devicetree/bindings/leds/silergy,sy7802.yaml100
-rw-r--r--Documentation/devicetree/bindings/leds/sprd,sc2731-bltc.yaml53
-rw-r--r--Documentation/devicetree/bindings/leds/st,led1202.yaml132
-rw-r--r--Documentation/devicetree/bindings/leds/ti,lp8860.yaml90
-rw-r--r--Documentation/devicetree/bindings/leds/ti,tlc59116.yaml90
-rw-r--r--Documentation/devicetree/bindings/leds/ti,tps61310.yaml120
-rw-r--r--Documentation/devicetree/bindings/leds/ti.lm36922.yaml110
-rw-r--r--Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml14
-rw-r--r--Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml10
-rw-r--r--Documentation/devicetree/bindings/mailbox/apm,xgene-slimpro-mbox.yaml62
-rw-r--r--Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml24
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm,mhu.yaml1
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm,mhuv2.yaml3
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm,mhuv3.yaml224
-rw-r--r--Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml68
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml64
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.txt59
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.yaml63
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt25
-rw-r--r--Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.yaml66
-rw-r--r--Documentation/devicetree/bindings/mailbox/cix,sky1-mbox.yaml77
-rw-r--r--Documentation/devicetree/bindings/mailbox/fsl,mu.yaml63
-rw-r--r--Documentation/devicetree/bindings/mailbox/google,gs101-mbox.yaml69
-rw-r--r--Documentation/devicetree/bindings/mailbox/marvell,armada-3700-rwtm-mailbox.txt16
-rw-r--r--Documentation/devicetree/bindings/mailbox/marvell,armada-3700-rwtm-mailbox.yaml42
-rw-r--r--Documentation/devicetree/bindings/mailbox/mediatek,gce-mailbox.yaml15
-rw-r--r--Documentation/devicetree/bindings/mailbox/mediatek,gce-props.yaml52
-rw-r--r--Documentation/devicetree/bindings/mailbox/mediatek,mt8196-gpueb-mbox.yaml64
-rw-r--r--Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml13
-rw-r--r--Documentation/devicetree/bindings/mailbox/microchip,sbi-ipc.yaml123
-rw-r--r--Documentation/devicetree/bindings/mailbox/mtk,adsp-mbox.yaml11
-rw-r--r--Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml37
-rw-r--r--Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml194
-rw-r--r--Documentation/devicetree/bindings/mailbox/qcom,cpucp-mbox.yaml54
-rw-r--r--Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml6
-rw-r--r--Documentation/devicetree/bindings/mailbox/riscv,rpmi-shmem-mbox.yaml124
-rw-r--r--Documentation/devicetree/bindings/mailbox/riscv,sbi-mpxy-mbox.yaml51
-rw-r--r--Documentation/devicetree/bindings/mailbox/rockchip,rk3368-mailbox.yaml56
-rw-r--r--Documentation/devicetree/bindings/mailbox/rockchip-mailbox.txt32
-rw-r--r--Documentation/devicetree/bindings/mailbox/sophgo,cv1800b-mailbox.yaml60
-rw-r--r--Documentation/devicetree/bindings/mailbox/thead,th1520-mbox.yaml89
-rw-r--r--Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml10
-rw-r--r--Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml18
-rw-r--r--Documentation/devicetree/bindings/mailbox/xgene-slimpro-mailbox.txt35
-rw-r--r--Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,c3-isp.yaml88
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,c3-mipi-adapter.yaml111
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,c3-mipi-csi2.yaml127
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,gx-vdec.yaml3
-rw-r--r--Documentation/devicetree/bindings/media/amlogic,meson-ir-tx.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/amphion,vpu.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/arm,mali-c55.yaml86
-rw-r--r--Documentation/devicetree/bindings/media/aspeed,video-engine.yaml70
-rw-r--r--Documentation/devicetree/bindings/media/aspeed-video.txt33
-rw-r--r--Documentation/devicetree/bindings/media/brcm,bcm2835-unicam.yaml127
-rw-r--r--Documentation/devicetree/bindings/media/cdns,csi2rx.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/cec/cec-common.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/cec/cec-gpio.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/cec/nvidia,tegra114-cec.yaml16
-rw-r--r--Documentation/devicetree/bindings/media/cec/st,stm32-cec.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx-capture-subsystem.yaml37
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx6-mipi-csi2.yaml143
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx6q-vdoa.yaml42
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx6ull-pxp.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx8qm-isi.yaml117
-rw-r--r--Documentation/devicetree/bindings/media/fsl,imx8qxp-isi.yaml106
-rw-r--r--Documentation/devicetree/bindings/media/fsl-vdoa.txt21
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ad5820.txt28
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,ad5820.yaml56
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adp1653.txt (renamed from Documentation/devicetree/bindings/media/i2c/adp1653.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adv7180.yaml189
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adv7343.txt (renamed from Documentation/devicetree/bindings/media/i2c/adv7343.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adv748x.yaml212
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adi,adv7604.yaml158
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv7180.yaml179
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv748x.yaml212
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv7604.yaml160
-rw-r--r--Documentation/devicetree/bindings/media/i2c/aptina,mt9v032.txt (renamed from Documentation/devicetree/bindings/media/i2c/mt9v032.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/dongwoon,dw9719.yaml89
-rw-r--r--Documentation/devicetree/bindings/media/i2c/galaxycore,gc0308.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/i2c/galaxycore,gc05a2.yaml112
-rw-r--r--Documentation/devicetree/bindings/media/i2c/galaxycore,gc08a3.yaml112
-rw-r--r--Documentation/devicetree/bindings/media/i2c/galaxycore,gc2145.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml10
-rw-r--r--Documentation/devicetree/bindings/media/i2c/imx219.yaml109
-rw-r--r--Documentation/devicetree/bindings/media/i2c/imx258.yaml134
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max2175.txt (renamed from Documentation/devicetree/bindings/media/i2c/max2175.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max96712.yaml5
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max96714.yaml174
-rw-r--r--Documentation/devicetree/bindings/media/i2c/maxim,max96717.yaml157
-rw-r--r--Documentation/devicetree/bindings/media/i2c/micron,mt9m111.txt (renamed from Documentation/devicetree/bindings/media/i2c/mt9m111.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/mipi-ccs.yaml20
-rw-r--r--Documentation/devicetree/bindings/media/i2c/nxp,tda19971.yaml162
-rw-r--r--Documentation/devicetree/bindings/media/i2c/onnn,mt9m001.txt (renamed from Documentation/devicetree/bindings/media/i2c/mt9m001.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/onnn,mt9m114.yaml9
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov8856.yaml132
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,og01a1b.yaml107
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,og0ve1b.yaml97
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml3
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov02e10.yaml152
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov08x40.yaml120
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2640.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov2640.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2659.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov2659.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2680.yaml35
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov2735.yaml108
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml6
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml11
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov6211.yaml96
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov7251.yaml6
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov7670.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov7670.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov7740.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov7740.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov8856.yaml131
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov8858.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml10
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov9282.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov9650.txt (renamed from Documentation/devicetree/bindings/media/i2c/ov9650.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/samsung,s5k5baf.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/i2c/samsung,s5k6a3.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx111.yaml105
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx214.yaml31
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx219.yaml112
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx258.yaml135
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx283.yaml107
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx290.yaml14
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx335.yaml12
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx412.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx415.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/i2c/st,st-mipid02.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/i2c/st,vd55g1.yaml137
-rw-r--r--Documentation/devicetree/bindings/media/i2c/st,vd56g3.yaml139
-rw-r--r--Documentation/devicetree/bindings/media/i2c/tda1997x.txt178
-rw-r--r--Documentation/devicetree/bindings/media/i2c/techwell,tw9900.yaml3
-rw-r--r--Documentation/devicetree/bindings/media/i2c/thine,thp7312.yaml5
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,ds90ub953.yaml77
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,ds90ub960.yaml19
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,ths8200.txt (renamed from Documentation/devicetree/bindings/media/i2c/ths8200.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,tvp514x.txt (renamed from Documentation/devicetree/bindings/media/i2c/tvp514x.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,tvp5150.yaml133
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ti,tvp7002.txt (renamed from Documentation/devicetree/bindings/media/i2c/tvp7002.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/toshiba,et8ek8.txt8
-rw-r--r--Documentation/devicetree/bindings/media/i2c/toshiba,tc358743.txt (renamed from Documentation/devicetree/bindings/media/i2c/tc358743.txt)0
-rw-r--r--Documentation/devicetree/bindings/media/i2c/tvp5150.txt157
-rw-r--r--Documentation/devicetree/bindings/media/img,e5010-jpeg-enc.yaml75
-rw-r--r--Documentation/devicetree/bindings/media/imx.txt53
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-fg.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-hdr.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-rdma.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-rsz.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-stitch.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-tcc.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-tdshp.yaml8
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mdp3-wrot.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mt7622-cir.yaml55
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mt8173-mdp.yaml169
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,mt8173-vpu.yaml74
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,vcodec-decoder.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,vcodec-encoder.yaml38
-rw-r--r--Documentation/devicetree/bindings/media/mediatek,vcodec-subdev-decoder.yaml101
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.yaml3
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.yaml3
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-mdp.txt95
-rw-r--r--Documentation/devicetree/bindings/media/mediatek-vpu.txt31
-rw-r--r--Documentation/devicetree/bindings/media/microchip,sama5d4-vdec.yaml19
-rw-r--r--Documentation/devicetree/bindings/media/mtk-cir.txt28
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx-mipi-csi2.yaml18
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8-isi.yaml16
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8-jpeg.yaml38
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml38
-rw-r--r--Documentation/devicetree/bindings/media/nxp,imx8mq-vpu.yaml41
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8916-camss.yaml9
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8916-venus.yaml12
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8939-camss.yaml254
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8953-camss.yaml337
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8996-camss.yaml21
-rw-r--r--Documentation/devicetree/bindings/media/qcom,msm8996-venus.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/qcom,qcm2290-camss.yaml243
-rw-r--r--Documentation/devicetree/bindings/media/qcom,qcm2290-venus.yaml130
-rw-r--r--Documentation/devicetree/bindings/media/qcom,qcs8300-camss.yaml336
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sa8775p-camss.yaml361
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml19
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sc7280-camss.yaml425
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sc7280-venus.yaml13
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sc8280xp-camss.yaml531
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm660-camss.yaml21
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm670-camss.yaml318
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm845-camss.yaml41
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml12
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8250-camss.yaml59
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8250-venus.yaml12
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8550-camss.yaml597
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8550-iris.yaml199
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8650-camss.yaml375
-rw-r--r--Documentation/devicetree/bindings/media/qcom,sm8750-iris.yaml186
-rw-r--r--Documentation/devicetree/bindings/media/qcom,x1e80100-camss.yaml367
-rw-r--r--Documentation/devicetree/bindings/media/raspberrypi,pispbe.yaml63
-rw-r--r--Documentation/devicetree/bindings/media/raspberrypi,rp1-cfe.yaml93
-rw-r--r--Documentation/devicetree/bindings/media/rc.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/renesas,csi2.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/renesas,fcp.yaml28
-rw-r--r--Documentation/devicetree/bindings/media/renesas,isp.yaml67
-rw-r--r--Documentation/devicetree/bindings/media/renesas,r9a09g057-ivc.yaml103
-rw-r--r--Documentation/devicetree/bindings/media/renesas,rzg2l-cru.yaml98
-rw-r--r--Documentation/devicetree/bindings/media/renesas,rzg2l-csi2.yaml61
-rw-r--r--Documentation/devicetree/bindings/media/renesas,vin.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/renesas,vsp1.yaml27
-rw-r--r--Documentation/devicetree/bindings/media/rockchip,px30-vip.yaml124
-rw-r--r--Documentation/devicetree/bindings/media/rockchip,rk3568-vepu.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/rockchip,rk3568-vicap.yaml172
-rw-r--r--Documentation/devicetree/bindings/media/rockchip,vdec.yaml81
-rw-r--r--Documentation/devicetree/bindings/media/rockchip-isp1.yaml60
-rw-r--r--Documentation/devicetree/bindings/media/rockchip-rga.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/rockchip-vpu.yaml38
-rw-r--r--Documentation/devicetree/bindings/media/s5p-mfc.txt0
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos4210-csis.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos4210-fimc.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos4212-fimc-is.yaml6
-rw-r--r--Documentation/devicetree/bindings/media/samsung,exynos4212-fimc-lite.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/samsung,fimc.yaml5
-rw-r--r--Documentation/devicetree/bindings/media/samsung,s5c73m3.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/samsung,s5pv210-jpeg.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/silabs,si470x.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/snps,dw-hdmi-rx.yaml132
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml4
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32-dcmipp.yaml53
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml1
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32mp25-csi.yaml124
-rw-r--r--Documentation/devicetree/bindings/media/st,stm32mp25-video-codec.yaml53
-rw-r--r--Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt88
-rw-r--r--Documentation/devicetree/bindings/media/ti,j721e-csi2rx-shim.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/video-interface-devices.yaml12
-rw-r--r--Documentation/devicetree/bindings/media/video-interfaces.yaml23
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/arm,pl172.txt127
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/arm,pl172.yaml222
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/brcm,brcmstb-memc-ddr.yaml58
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/exynos-srom.yaml37
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml31
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ifc.yaml34
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim-peripherals.yaml31
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/fsl,imx-weim.yaml201
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.yaml4
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mc-peripheral-props.yaml3
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml1
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml1
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra186-mc.yaml84
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-emc.yaml2
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra210-emc.yaml11
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/qca,ath79-ddr-controller.yaml7
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/qcom,ebi2-peripheral-props.yaml90
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/qcom,ebi2.yaml156
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas,rpc-if.yaml5
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas,rzg3e-xspi.yaml142
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/samsung,exynos4210-srom-peripheral-props.yaml35
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/samsung,s5pv210-dmc.yaml33
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml11
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/st,stm32mp25-omm.yaml226
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/starfive,jh7110-dmc.yaml74
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/xlnx,versal-net-ddrmc5.yaml41
-rw-r--r--Documentation/devicetree/bindings/mfd/act8945a.txt82
-rw-r--r--Documentation/devicetree/bindings/mfd/actions,atc260x.yaml6
-rw-r--r--Documentation/devicetree/bindings/mfd/adi,adp5585.yaml313
-rw-r--r--Documentation/devicetree/bindings/mfd/airoha,en7581-gpio-sysctl.yaml90
-rw-r--r--Documentation/devicetree/bindings/mfd/allwinner,sun6i-a31-prcm.yaml14
-rw-r--r--Documentation/devicetree/bindings/mfd/apple,smc.yaml84
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed,ast2x00-scu.yaml70
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed-gfx.txt17
-rw-r--r--Documentation/devicetree/bindings/mfd/aspeed-lpc.yaml21
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel,at91sam9260-gpbr.yaml45
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel,at91sam9260-matrix.yaml52
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel,hlcdc.yaml99
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel,sama5d2-flexcom.yaml98
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-flexcom.txt64
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-gpbr.txt18
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt56
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-matrix.txt26
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm59056.txt39
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm59056.yaml76
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,cru.yaml8
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,iproc-cdru.txt16
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,iproc-mhb.txt18
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,misc.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml6
-rw-r--r--Documentation/devicetree/bindings/mfd/da9052-i2c.txt67
-rw-r--r--Documentation/devicetree/bindings/mfd/da9062.txt124
-rw-r--r--Documentation/devicetree/bindings/mfd/delta,tn48m-cpld.yaml4
-rw-r--r--Documentation/devicetree/bindings/mfd/dlg,da9052.yaml89
-rw-r--r--Documentation/devicetree/bindings/mfd/dlg,da9063.yaml253
-rw-r--r--Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml192
-rw-r--r--Documentation/devicetree/bindings/mfd/fsl,mc13xxx.yaml300
-rw-r--r--Documentation/devicetree/bindings/mfd/fsl,mcu-mpc8349emitx.yaml53
-rw-r--r--Documentation/devicetree/bindings/mfd/gateworks-gsc.yaml193
-rw-r--r--Documentation/devicetree/bindings/mfd/iqs62x.yaml183
-rw-r--r--Documentation/devicetree/bindings/mfd/lp3943.txt2
-rw-r--r--Documentation/devicetree/bindings/mfd/lp873x.txt67
-rw-r--r--Documentation/devicetree/bindings/mfd/marvell,88pm886-a1.yaml80
-rw-r--r--Documentation/devicetree/bindings/mfd/max77650.yaml8
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max7360.yaml191
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77686.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77693.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77705.yaml172
-rw-r--r--Documentation/devicetree/bindings/mfd/maxim,max77759.yaml99
-rw-r--r--Documentation/devicetree/bindings/mfd/mc13xxx.txt156
-rw-r--r--Documentation/devicetree/bindings/mfd/mediatek,mt6357.yaml21
-rw-r--r--Documentation/devicetree/bindings/mfd/mediatek,mt6397.yaml600
-rw-r--r--Documentation/devicetree/bindings/mfd/mediatek,mt8195-scpsys.yaml3
-rw-r--r--Documentation/devicetree/bindings/mfd/mfd.txt15
-rw-r--r--Documentation/devicetree/bindings/mfd/motorola-cpcap.txt8
-rw-r--r--Documentation/devicetree/bindings/mfd/mscc,ocelot.yaml6
-rw-r--r--Documentation/devicetree/bindings/mfd/mt6397.txt110
-rw-r--r--Documentation/devicetree/bindings/mfd/mxs-lradc.txt45
-rw-r--r--Documentation/devicetree/bindings/mfd/mxs-lradc.yaml134
-rw-r--r--Documentation/devicetree/bindings/mfd/netronix,ntxec.yaml39
-rw-r--r--Documentation/devicetree/bindings/mfd/nxp,lpc1850-creg.yaml148
-rw-r--r--Documentation/devicetree/bindings/mfd/nxp,pf1550.yaml161
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml144
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.yaml10
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,tcsr.yaml11
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml1
-rw-r--r--Documentation/devicetree/bindings/mfd/qnap,ts433-mcu.yaml46
-rw-r--r--Documentation/devicetree/bindings/mfd/renesas,r2a11302ft.yaml58
-rw-r--r--Documentation/devicetree/bindings/mfd/richtek,rt4831.yaml4
-rw-r--r--Documentation/devicetree/bindings/mfd/ricoh,rn5t618.yaml6
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk805.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml21
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk808.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk809.yaml288
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk816.yaml274
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml70
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk818.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml22
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml26
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd9571mwv.yaml52
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd96801-pmic.yaml177
-rw-r--r--Documentation/devicetree/bindings/mfd/rohm,bd96802-pmic.yaml101
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,s2dos05.yaml99
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,s2mpa01.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,s2mps11.yaml56
-rw-r--r--Documentation/devicetree/bindings/mfd/samsung,s5m8767.yaml4
-rw-r--r--Documentation/devicetree/bindings/mfd/silergy,sy7636a.yaml11
-rw-r--r--Documentation/devicetree/bindings/mfd/spacemit,p1.yaml86
-rw-r--r--Documentation/devicetree/bindings/mfd/sprd,sc2731.yaml244
-rw-r--r--Documentation/devicetree/bindings/mfd/sprd,sc27xx-pmic.txt40
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml44
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml22
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stmfx.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/st,stpmic1.yaml4
-rw-r--r--Documentation/devicetree/bindings/mfd/stericsson,ab8500.yaml49
-rw-r--r--Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml40
-rw-r--r--Documentation/devicetree/bindings/mfd/syscon-common.yaml68
-rw-r--r--Documentation/devicetree/bindings/mfd/syscon.yaml222
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,bq25703a.yaml117
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,lp8732.yaml112
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,lp87524-q1.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,lp87561-q1.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,lp87565-q1.yaml2
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,tps65086.yaml4
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,tps65910.yaml315
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,tps6594.yaml3
-rw-r--r--Documentation/devicetree/bindings/mfd/ti,twl.yaml534
-rw-r--r--Documentation/devicetree/bindings/mfd/tps65910.txt205
-rw-r--r--Documentation/devicetree/bindings/mfd/twl4030-audio.txt46
-rw-r--r--Documentation/devicetree/bindings/mfd/twl4030-power.txt48
-rw-r--r--Documentation/devicetree/bindings/mfd/twl6040.txt2
-rw-r--r--Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml209
-rw-r--r--Documentation/devicetree/bindings/mfd/zii,rave-sp.txt39
-rw-r--r--Documentation/devicetree/bindings/mfd/zii,rave-sp.yaml63
-rw-r--r--Documentation/devicetree/bindings/mips/brcm/soc.yaml74
-rw-r--r--Documentation/devicetree/bindings/mips/cpus.yaml17
-rw-r--r--Documentation/devicetree/bindings/mips/econet.yaml26
-rw-r--r--Documentation/devicetree/bindings/mips/loongson/devices.yaml2
-rw-r--r--Documentation/devicetree/bindings/mips/mobileye.yaml37
-rw-r--r--Documentation/devicetree/bindings/mips/mscc.txt17
-rw-r--r--Documentation/devicetree/bindings/mips/mti,mips-cm.yaml57
-rw-r--r--Documentation/devicetree/bindings/mips/realtek-rtl.yaml4
-rw-r--r--Documentation/devicetree/bindings/misc/aspeed,ast2400-cvic.yaml60
-rw-r--r--Documentation/devicetree/bindings/misc/aspeed,cvic.txt35
-rw-r--r--Documentation/devicetree/bindings/misc/aspeed-p2a-ctrl.txt46
-rw-r--r--Documentation/devicetree/bindings/misc/atmel-ssc.txt50
-rw-r--r--Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt196
-rw-r--r--Documentation/devicetree/bindings/misc/fsl,qoriq-mc.yaml187
-rw-r--r--Documentation/devicetree/bindings/misc/intel,ixp4xx-ahb-queue-manager.yaml6
-rw-r--r--Documentation/devicetree/bindings/misc/lwn,bk4-spi.yaml54
-rw-r--r--Documentation/devicetree/bindings/misc/lwn-bk4.txt26
-rw-r--r--Documentation/devicetree/bindings/misc/nvidia,tegra186-misc.yaml1
-rw-r--r--Documentation/devicetree/bindings/misc/pci1de4,1.yaml137
-rw-r--r--Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml7
-rw-r--r--Documentation/devicetree/bindings/misc/qemu,vcpu-stall-detector.yaml6
-rw-r--r--Documentation/devicetree/bindings/misc/ti,fpc202.yaml94
-rw-r--r--Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt58
-rw-r--r--Documentation/devicetree/bindings/misc/xlnx,sd-fec.yaml140
-rw-r--r--Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml38
-rw-r--r--Documentation/devicetree/bindings/mmc/amlogic,meson-gx-mmc.yaml3
-rw-r--r--Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdio.txt54
-rw-r--r--Documentation/devicetree/bindings/mmc/amlogic,meson-mx-sdio.yaml97
-rw-r--r--Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml22
-rw-r--r--Documentation/devicetree/bindings/mmc/arm,pl18x.yaml4
-rw-r--r--Documentation/devicetree/bindings/mmc/atmel,hsmci.yaml106
-rw-r--r--Documentation/devicetree/bindings/mmc/atmel,sama5d2-sdhci.yaml93
-rw-r--r--Documentation/devicetree/bindings/mmc/atmel-hsmci.txt73
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml5
-rw-r--r--Documentation/devicetree/bindings/mmc/cdns,sdhci.yaml4
-rw-r--r--Documentation/devicetree/bindings/mmc/davinci_mmc.txt32
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl,esdhc.yaml107
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-esdhc.txt52
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml15
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-mmc.yaml12
-rw-r--r--Documentation/devicetree/bindings/mmc/hi3798cv200-dw-mshc.txt40
-rw-r--r--Documentation/devicetree/bindings/mmc/hisilicon,hi3798cv200-dw-mshc.yaml97
-rw-r--r--Documentation/devicetree/bindings/mmc/loongson,ls2k0500-mmc.yaml112
-rw-r--r--Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.yaml69
-rw-r--r--Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.txt29
-rw-r--r--Documentation/devicetree/bindings/mmc/microchip,sdhci-pic32.yaml66
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-card.yaml52
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-controller-common.yaml365
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-controller.yaml344
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-slot.yaml48
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-spi-slot.yaml16
-rw-r--r--Documentation/devicetree/bindings/mmc/mtk-sd.yaml36
-rw-r--r--Documentation/devicetree/bindings/mmc/mxs-mmc.yaml7
-rw-r--r--Documentation/devicetree/bindings/mmc/nuvoton,ma35d1-sdhci.yaml87
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,sdhi.yaml224
-rw-r--r--Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml5
-rw-r--r--Documentation/devicetree/bindings/mmc/samsung,exynos-dw-mshc.yaml4
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-am654.yaml3
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-atmel.txt35
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-milbeaut.txt30
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.yaml14
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-omap.txt43
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml49
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-sprd.txt67
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci.txt13
-rw-r--r--Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml150
-rw-r--r--Documentation/devicetree/bindings/mmc/socionext,milbeaut-m10v-sdhci-3.0.yaml79
-rw-r--r--Documentation/devicetree/bindings/mmc/spacemit,sdhci.yaml53
-rw-r--r--Documentation/devicetree/bindings/mmc/sprd,sdhci-r11.yaml112
-rw-r--r--Documentation/devicetree/bindings/mmc/ti,da830-mmc.yaml61
-rw-r--r--Documentation/devicetree/bindings/mmc/ti,omap2430-sdhci.yaml169
-rw-r--r--Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/wm,wm8505-sdhc.yaml66
-rw-r--r--Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml41
-rw-r--r--Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml19
-rw-r--r--Documentation/devicetree/bindings/mtd/arasan,nand-controller.yaml2
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel,dataflash.yaml55
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-dataflash.txt17
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-nand.txt10
-rw-r--r--Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml44
-rw-r--r--Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt53
-rw-r--r--Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml84
-rw-r--r--Documentation/devicetree/bindings/mtd/davinci-nand.txt94
-rw-r--r--Documentation/devicetree/bindings/mtd/flctl-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl,vf610-nfc.yaml89
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/gpio-control-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmi-nand.yaml31
-rw-r--r--Documentation/devicetree/bindings/mtd/hisi504-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml9
-rw-r--r--Documentation/devicetree/bindings/mtd/loongson,ls1b-nand-controller.yaml122
-rw-r--r--Documentation/devicetree/bindings/mtd/marvell,nand-controller.yaml1
-rw-r--r--Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml5
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd-physmap.yaml15
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd.yaml6
-rw-r--r--Documentation/devicetree/bindings/mtd/mxc-nand.yaml8
-rw-r--r--Documentation/devicetree/bindings/mtd/nuvoton,ma35d1-nand.yaml95
-rw-r--r--Documentation/devicetree/bindings/mtd/nvidia-tegra20-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/nxp,lpc1773-spifi.yaml74
-rw-r--r--Documentation/devicetree/bindings/mtd/nxp-spifi.txt58
-rw-r--r--Documentation/devicetree/bindings/mtd/orion-nand.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/binman.yaml53
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml2
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/linux,ubi.yaml75
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/partition.yaml72
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/ubi-volume.yaml40
-rw-r--r--Documentation/devicetree/bindings/mtd/qcom,nandc.yaml64
-rw-r--r--Documentation/devicetree/bindings/mtd/realtek,rtl9301-ecc.yaml41
-rw-r--r--Documentation/devicetree/bindings/mtd/samsung,s5pv210-onenand.yaml65
-rw-r--r--Documentation/devicetree/bindings/mtd/samsung-s3c2410.txt56
-rw-r--r--Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml25
-rw-r--r--Documentation/devicetree/bindings/mtd/technologic,nand.yaml45
-rw-r--r--Documentation/devicetree/bindings/mtd/ti,davinci-nand.yaml124
-rw-r--r--Documentation/devicetree/bindings/mtd/ti,gpmc-nand.yaml5
-rw-r--r--Documentation/devicetree/bindings/mtd/vf610-nfc.txt59
-rw-r--r--Documentation/devicetree/bindings/mux/gpio-mux.yaml4
-rw-r--r--Documentation/devicetree/bindings/mux/mux-controller.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/adi,adin.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/adi,adin1110.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/aeonsemi,as21xxx.yaml122
-rw-r--r--Documentation/devicetree/bindings/net/airoha,an7583-mdio.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/airoha,en7581-eth.yaml199
-rw-r--r--Documentation/devicetree/bindings/net/airoha,en7581-npu.yaml99
-rw-r--r--Documentation/devicetree/bindings/net/airoha,en8811h.yaml56
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml9
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml97
-rw-r--r--Documentation/devicetree/bindings/net/altr,gmii-to-sgmii-2.0.yaml49
-rw-r--r--Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml178
-rw-r--r--Documentation/devicetree/bindings/net/amd,xgbe-seattle-v1a.yaml147
-rw-r--r--Documentation/devicetree/bindings/net/amd-xgbe.txt76
-rw-r--r--Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml44
-rw-r--r--Documentation/devicetree/bindings/net/apm,xgene-enet.yaml115
-rw-r--r--Documentation/devicetree/bindings/net/apm,xgene-mdio-rgmii.yaml54
-rw-r--r--Documentation/devicetree/bindings/net/apm-xgene-enet.txt91
-rw-r--r--Documentation/devicetree/bindings/net/apm-xgene-mdio.txt37
-rw-r--r--Documentation/devicetree/bindings/net/arc_emac.txt46
-rw-r--r--Documentation/devicetree/bindings/net/asix,ax88178.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/aspeed,ast2600-mdio.yaml7
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/amlogic,w155s2-bt.yaml63
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/brcm,bcm4377-bluetooth.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/brcm,bluetooth.yaml163
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/marvell,88w8897.yaml49
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/marvell,sd8897-bt.yaml79
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/mediatek,bluetooth.txt80
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7622-bluetooth.yaml51
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/mediatek,mt7921s-bluetooth.yaml55
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/nokia,h4p-bluetooth.txt (renamed from Documentation/devicetree/bindings/net/nokia-bluetooth.txt)0
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/nxp,88w8987-bt.yaml65
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml71
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/realtek,bluetooth.yaml68
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/ti,bluetooth.yaml (renamed from Documentation/devicetree/bindings/net/ti,bluetooth.yaml)5
-rw-r--r--Documentation/devicetree/bindings/net/brcm,asp-v2.0.yaml27
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt50
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml33
-rw-r--r--Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.yaml51
-rw-r--r--Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml159
-rw-r--r--Documentation/devicetree/bindings/net/btusb.txt2
-rw-r--r--Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml58
-rw-r--r--Documentation/devicetree/bindings/net/can/atmel-can.txt15
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,c_can.yaml10
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,m_can.yaml32
-rw-r--r--Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml62
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml70
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt30
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml7
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml15
-rw-r--r--Documentation/devicetree/bindings/net/can/nxp,sja1000.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml171
-rw-r--r--Documentation/devicetree/bindings/net/can/rockchip,rk3568v2-canfd.yaml74
-rw-r--r--Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/can/tcan4x5x.txt45
-rw-r--r--Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml199
-rw-r--r--Documentation/devicetree/bindings/net/can/xilinx,can.yaml7
-rw-r--r--Documentation/devicetree/bindings/net/cdns,macb.yaml47
-rw-r--r--Documentation/devicetree/bindings/net/cirrus,ep9301-eth.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/cortina,gemini-ethernet.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/dsa/ar9331.txt147
-rw-r--r--Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml8
-rw-r--r--Documentation/devicetree/bindings/net/dsa/lantiq,gswip.yaml350
-rw-r--r--Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt146
-rw-r--r--Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml43
-rw-r--r--Documentation/devicetree/bindings/net/dsa/micrel,ks8995.yaml135
-rw-r--r--Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml118
-rw-r--r--Documentation/devicetree/bindings/net/dsa/motorcomm,yt921x.yaml167
-rw-r--r--Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml12
-rw-r--r--Documentation/devicetree/bindings/net/dsa/qca,ar9331.yaml161
-rw-r--r--Documentation/devicetree/bindings/net/dsa/realtek.yaml50
-rw-r--r--Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.txt129
-rw-r--r--Documentation/devicetree/bindings/net/dsa/vitesse,vsc73xx.yaml194
-rw-r--r--Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml129
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml134
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-phy-package.yaml52
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-phy.yaml57
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-switch.yaml16
-rw-r--r--Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml24
-rw-r--r--Documentation/devicetree/bindings/net/fsl,cpm-enet.yaml59
-rw-r--r--Documentation/devicetree/bindings/net/fsl,cpm-mdio.yaml55
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc-ierb.yaml38
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc-mdio.yaml60
-rw-r--r--Documentation/devicetree/bindings/net/fsl,enetc.yaml89
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fec.yaml10
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-dtsec.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-mdio.yaml123
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-muram.yaml40
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman-port.yaml75
-rw-r--r--Documentation/devicetree/bindings/net/fsl,fman.yaml210
-rw-r--r--Documentation/devicetree/bindings/net/fsl,gianfar-mdio.yaml112
-rw-r--r--Documentation/devicetree/bindings/net/fsl,gianfar.yaml246
-rw-r--r--Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml12
-rw-r--r--Documentation/devicetree/bindings/net/fsl-enetc.txt119
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fman.txt548
-rw-r--r--Documentation/devicetree/bindings/net/fsl-tsec-phy.txt82
-rw-r--r--Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt10
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt27
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/atmel,at86rf233.yaml66
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/ca8210.txt2
-rw-r--r--Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/litex,liteeth.yaml12
-rw-r--r--Documentation/devicetree/bindings/net/lpc-eth.txt28
-rw-r--r--Documentation/devicetree/bindings/net/marvell,aquantia.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/marvell,armada-370-neta.yaml79
-rw-r--r--Documentation/devicetree/bindings/net/marvell,armada-380-neta-bm.yaml60
-rw-r--r--Documentation/devicetree/bindings/net/marvell,prestera.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt50
-rw-r--r--Documentation/devicetree/bindings/net/marvell-bluetooth.yaml49
-rw-r--r--Documentation/devicetree/bindings/net/marvell-bt-8xxx.txt83
-rw-r--r--Documentation/devicetree/bindings/net/marvell-neta-bm.txt47
-rw-r--r--Documentation/devicetree/bindings/net/maxim,ds26522.txt13
-rw-r--r--Documentation/devicetree/bindings/net/maxim,ds26522.yaml40
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-gpio.yaml32
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-multiplexer.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/mdio.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/mediatek,net.yaml146
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-bluetooth.txt116
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-dwmac.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ks8995.txt20
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ksz90x1.txt4
-rw-r--r--Documentation/devicetree/bindings/net/micrel.txt2
-rw-r--r--Documentation/devicetree/bindings/net/microchip,lan8650.yaml74
-rw-r--r--Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml61
-rw-r--r--Documentation/devicetree/bindings/net/mscc,miim.yaml10
-rw-r--r--Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt73
-rw-r--r--Documentation/devicetree/bindings/net/mscc-phy-vsc8531.yaml131
-rw-r--r--Documentation/devicetree/bindings/net/network-class.yaml46
-rw-r--r--Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml9
-rw-r--r--Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml12
-rw-r--r--Documentation/devicetree/bindings/net/nxp,lpc-eth.yaml48
-rw-r--r--Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.txt20
-rw-r--r--Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml85
-rw-r--r--Documentation/devicetree/bindings/net/nxp,netc-blk-ctrl.yaml105
-rw-r--r--Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml105
-rw-r--r--Documentation/devicetree/bindings/net/nxp,tja11xx.yaml78
-rw-r--r--Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml177
-rw-r--r--Documentation/devicetree/bindings/net/pcs/snps,dw-xpcs.yaml136
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml192
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml101
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/skyworks,si3474.yaml144
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml127
-rw-r--r--Documentation/devicetree/bindings/net/qca,ar803x.yaml43
-rw-r--r--Documentation/devicetree/bindings/net/qca,qca7000.txt87
-rw-r--r--Documentation/devicetree/bindings/net/qca,qca7000.yaml109
-rw-r--r--Documentation/devicetree/bindings/net/qca,qca808x.yaml54
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ethqos.yaml41
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipa.yaml131
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml17
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipq9574-ppe.yaml533
-rw-r--r--Documentation/devicetree/bindings/net/qcom,qca807x.yaml184
-rw-r--r--Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml46
-rw-r--r--Documentation/devicetree/bindings/net/realtek,rtl9301-mdio.yaml86
-rw-r--r--Documentation/devicetree/bindings/net/realtek,rtl9301-switch.yaml175
-rw-r--r--Documentation/devicetree/bindings/net/realtek-bluetooth.yaml65
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ether.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/renesas,etheravb.yaml42
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ethertsn.yaml35
-rw-r--r--Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml75
-rw-r--r--Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml303
-rw-r--r--Documentation/devicetree/bindings/net/rfkill-gpio.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/rockchip-dwmac.yaml56
-rw-r--r--Documentation/devicetree/bindings/net/sff,sfp.yaml14
-rw-r--r--Documentation/devicetree/bindings/net/smsc,lan9115.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml232
-rw-r--r--Documentation/devicetree/bindings/net/socfpga-dwmac.txt57
-rw-r--r--Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml8
-rw-r--r--Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml114
-rw-r--r--Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml151
-rw-r--r--Documentation/devicetree/bindings/net/spacemit,k1-emac.yaml81
-rw-r--r--Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml90
-rw-r--r--Documentation/devicetree/bindings/net/stm32-dwmac.yaml164
-rw-r--r--Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml118
-rw-r--r--Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml112
-rw-r--r--Documentation/devicetree/bindings/net/ti,cc1352p7.yaml7
-rw-r--r--Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml10
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83822.yaml65
-rw-r--r--Documentation/devicetree/bindings/net/ti,icss-iep.yaml19
-rw-r--r--Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml44
-rw-r--r--Documentation/devicetree/bindings/net/ti,icssm-prueth.yaml233
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml28
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml42
-rw-r--r--Documentation/devicetree/bindings/net/ti,pruss-ecap.yaml32
-rw-r--r--Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/via,vt8500-rhine.yaml41
-rw-r--r--Documentation/devicetree/bindings/net/via-rhine.txt17
-rw-r--r--Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml13
-rw-r--r--Documentation/devicetree/bindings/net/wireless/marvell,sd8787.yaml92
-rw-r--r--Documentation/devicetree/bindings/net/wireless/marvell-8xxx.txt70
-rw-r--r--Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml99
-rw-r--r--Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml18
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml34
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml92
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml21
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml210
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml104
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ipq5332-wifi.yaml315
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ralink,rt2880.yaml49
-rw-r--r--Documentation/devicetree/bindings/net/wireless/realtek,rtl8188e.yaml50
-rw-r--r--Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ti,wlcore.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/wireless/wireless-controller.yaml23
-rw-r--r--Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/xlnx,emaclite.yaml5
-rw-r--r--Documentation/devicetree/bindings/net/xlnx,gmii-to-rgmii.yaml5
-rw-r--r--Documentation/devicetree/bindings/npu/arm,ethos.yaml79
-rw-r--r--Documentation/devicetree/bindings/npu/rockchip,rk3588-rknn-core.yaml112
-rw-r--r--Documentation/devicetree/bindings/numa.txt319
-rw-r--r--Documentation/devicetree/bindings/nvme/apple,nvme-ans.yaml30
-rw-r--r--Documentation/devicetree/bindings/nvmem/airoha,an8855-efuse.yaml123
-rw-r--r--Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml1
-rw-r--r--Documentation/devicetree/bindings/nvmem/amlogic,meson-gxbb-efuse.yaml3
-rw-r--r--Documentation/devicetree/bindings/nvmem/amlogic,meson6-efuse.yaml2
-rw-r--r--Documentation/devicetree/bindings/nvmem/apple,spmi-nvmem.yaml54
-rw-r--r--Documentation/devicetree/bindings/nvmem/brcm,ocotp.txt17
-rw-r--r--Documentation/devicetree/bindings/nvmem/brcm,ocotp.yaml39
-rw-r--r--Documentation/devicetree/bindings/nvmem/fsl,layerscape-sfp.yaml1
-rw-r--r--Documentation/devicetree/bindings/nvmem/fsl,vf610-ocotp.yaml47
-rw-r--r--Documentation/devicetree/bindings/nvmem/imx-iim.yaml4
-rw-r--r--Documentation/devicetree/bindings/nvmem/imx-ocotp.yaml9
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml24
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/fixed-layout.yaml2
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml7
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml1
-rw-r--r--Documentation/devicetree/bindings/nvmem/layouts/u-boot,env.yaml139
-rw-r--r--Documentation/devicetree/bindings/nvmem/lpc1857-eeprom.txt28
-rw-r--r--Documentation/devicetree/bindings/nvmem/maxim,max77759-nvmem.yaml32
-rw-r--r--Documentation/devicetree/bindings/nvmem/mediatek,efuse.yaml22
-rw-r--r--Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml4
-rw-r--r--Documentation/devicetree/bindings/nvmem/nvmem-provider.yaml18
-rw-r--r--Documentation/devicetree/bindings/nvmem/nxp,lpc1857-eeprom.yaml61
-rw-r--r--Documentation/devicetree/bindings/nvmem/nxp,s32g-ocotp-nvmem.yaml45
-rw-r--r--Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml14
-rw-r--r--Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml2
-rw-r--r--Documentation/devicetree/bindings/nvmem/renesas,rcar-efuse.yaml68
-rw-r--r--Documentation/devicetree/bindings/nvmem/renesas,rcar-otp.yaml43
-rw-r--r--Documentation/devicetree/bindings/nvmem/rmem.yaml1
-rw-r--r--Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml25
-rw-r--r--Documentation/devicetree/bindings/nvmem/sc27xx-efuse.txt52
-rw-r--r--Documentation/devicetree/bindings/nvmem/sprd,sc2731-efuse.yaml39
-rw-r--r--Documentation/devicetree/bindings/nvmem/sprd,ums312-efuse.yaml61
-rw-r--r--Documentation/devicetree/bindings/nvmem/sprd-efuse.txt39
-rw-r--r--Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml5
-rw-r--r--Documentation/devicetree/bindings/nvmem/u-boot,env.yaml103
-rw-r--r--Documentation/devicetree/bindings/nvmem/vf610-ocotp.txt19
-rw-r--r--Documentation/devicetree/bindings/nvmem/xlnx,zynqmp-nvmem.txt46
-rw-r--r--Documentation/devicetree/bindings/nvmem/xlnx,zynqmp-nvmem.yaml42
-rw-r--r--Documentation/devicetree/bindings/nvmem/zii,rave-sp-eeprom.txt40
-rw-r--r--Documentation/devicetree/bindings/nvmem/zii,rave-sp-eeprom.yaml54
-rw-r--r--Documentation/devicetree/bindings/opp/allwinner,sun50i-h6-operating-points.yaml88
-rw-r--r--Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml22
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v1.yaml18
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v2-base.yaml2
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v2-qcom-adreno.yaml96
-rw-r--r--Documentation/devicetree/bindings/pci/83xx-512x-pci.txt39
-rw-r--r--Documentation/devicetree/bindings/pci/aardvark-pci.txt59
-rw-r--r--Documentation/devicetree/bindings/pci/altera-pcie-msi.txt27
-rw-r--r--Documentation/devicetree/bindings/pci/altera-pcie.txt50
-rw-r--r--Documentation/devicetree/bindings/pci/altr,pcie-root-port.yaml123
-rw-r--r--Documentation/devicetree/bindings/pci/amazon,al-alpine-v3-pcie.yaml71
-rw-r--r--Documentation/devicetree/bindings/pci/amd,versal2-mdb-host.yaml143
-rw-r--r--Documentation/devicetree/bindings/pci/amlogic,axg-pcie.yaml25
-rw-r--r--Documentation/devicetree/bindings/pci/apm,xgene-pcie.yaml84
-rw-r--r--Documentation/devicetree/bindings/pci/apple,pcie.yaml35
-rw-r--r--Documentation/devicetree/bindings/pci/axis,artpec6-pcie.txt50
-rw-r--r--Documentation/devicetree/bindings/pci/axis,artpec6-pcie.yaml118
-rw-r--r--Documentation/devicetree/bindings/pci/brcm,iproc-pcie.yaml3
-rw-r--r--Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml140
-rw-r--r--Documentation/devicetree/bindings/pci/cdns,cdns-pcie-ep.yaml16
-rw-r--r--Documentation/devicetree/bindings/pci/cdns,cdns-pcie-host.yaml3
-rw-r--r--Documentation/devicetree/bindings/pci/cdns-pcie-host.yaml2
-rw-r--r--Documentation/devicetree/bindings/pci/cix,sky1-pcie-host.yaml83
-rw-r--r--Documentation/devicetree/bindings/pci/faraday,ftpci100.yaml2
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-common.yaml21
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-ep.yaml94
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml92
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,layerscape-pcie-ep.yaml99
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,layerscape-pcie.yaml182
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,mpc8xxx-pci.yaml113
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,pci.txt27
-rw-r--r--Documentation/devicetree/bindings/pci/hisilicon,kirin-pcie.yaml3
-rw-r--r--Documentation/devicetree/bindings/pci/host-generic-pci.yaml10
-rw-r--r--Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml2
-rw-r--r--Documentation/devicetree/bindings/pci/intel,keembay-pcie-ep.yaml26
-rw-r--r--Documentation/devicetree/bindings/pci/intel,keembay-pcie.yaml40
-rw-r--r--Documentation/devicetree/bindings/pci/layerscape-pci.txt79
-rw-r--r--Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt52
-rw-r--r--Documentation/devicetree/bindings/pci/loongson.yaml3
-rw-r--r--Documentation/devicetree/bindings/pci/marvell,armada-3700-pcie.yaml103
-rw-r--r--Documentation/devicetree/bindings/pci/marvell,armada8k-pcie.yaml100
-rw-r--r--Documentation/devicetree/bindings/pci/marvell,kirkwood-pcie.yaml280
-rw-r--r--Documentation/devicetree/bindings/pci/mbvl,gpex40-pcie.yaml173
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek,mt7621-pcie.yaml36
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml123
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek-pcie-mt7623.yaml164
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek-pcie.txt289
-rw-r--r--Documentation/devicetree/bindings/pci/mediatek-pcie.yaml438
-rw-r--r--Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml109
-rw-r--r--Documentation/devicetree/bindings/pci/mobiveil-pcie.txt72
-rw-r--r--Documentation/devicetree/bindings/pci/mvebu-pci.txt310
-rw-r--r--Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie-ep.yaml2
-rw-r--r--Documentation/devicetree/bindings/pci/nxp,s32g-pcie.yaml130
-rw-r--r--Documentation/devicetree/bindings/pci/pci-armada8k.txt48
-rw-r--r--Documentation/devicetree/bindings/pci/pci-ep-bus.yaml58
-rw-r--r--Documentation/devicetree/bindings/pci/pci-ep.yaml84
-rw-r--r--Documentation/devicetree/bindings/pci/pci-iommu.txt171
-rw-r--r--Documentation/devicetree/bindings/pci/pci-msi.txt220
-rw-r--r--Documentation/devicetree/bindings/pci/pci.txt84
-rw-r--r--Documentation/devicetree/bindings/pci/pcie-al.txt46
-rw-r--r--Documentation/devicetree/bindings/pci/plda,xpressrich3-axi-common.yaml81
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml135
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml119
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sa8255p.yaml122
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sa8775p.yaml183
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sc7280.yaml195
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sc8180x.yaml168
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sc8280xp.yaml180
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8150.yaml165
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8250.yaml181
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8350.yaml170
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml185
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-sm8550.yaml183
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie-x1e80100.yaml173
-rw-r--r--Documentation/devicetree/bindings/pci/qcom,pcie.yaml492
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-gen4-pci-ep.yaml5
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-gen4-pci-host.yaml5
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-pci-ep.yaml34
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-pci-host.yaml51
-rw-r--r--Documentation/devicetree/bindings/pci/renesas,pci-rcar-gen2.yaml10
-rw-r--r--Documentation/devicetree/bindings/pci/renesas,r9a08g045-pcie.yaml249
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip,rk3399-pcie.yaml3
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip-dw-pcie-common.yaml132
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip-dw-pcie-ep.yaml95
-rw-r--r--Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml153
-rw-r--r--Documentation/devicetree/bindings/pci/sifive,fu740-pcie.yaml2
-rw-r--r--Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml7
-rw-r--r--Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml13
-rw-r--r--Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml11
-rw-r--r--Documentation/devicetree/bindings/pci/socionext,uniphier-pcie-ep.yaml8
-rw-r--r--Documentation/devicetree/bindings/pci/socionext,uniphier-pcie.yaml4
-rw-r--r--Documentation/devicetree/bindings/pci/sophgo,sg2042-pcie-host.yaml64
-rw-r--r--Documentation/devicetree/bindings/pci/sophgo,sg2044-pcie.yaml122
-rw-r--r--Documentation/devicetree/bindings/pci/spacemit,k1-pcie-host.yaml157
-rw-r--r--Documentation/devicetree/bindings/pci/spear13xx-pcie.txt14
-rw-r--r--Documentation/devicetree/bindings/pci/st,spear1340-pcie.yaml45
-rw-r--r--Documentation/devicetree/bindings/pci/st,stm32-pcie-common.yaml33
-rw-r--r--Documentation/devicetree/bindings/pci/st,stm32-pcie-ep.yaml73
-rw-r--r--Documentation/devicetree/bindings/pci/st,stm32-pcie-host.yaml112
-rw-r--r--Documentation/devicetree/bindings/pci/starfive,jh7110-pcie.yaml126
-rw-r--r--Documentation/devicetree/bindings/pci/ti,am65-pci-host.yaml50
-rw-r--r--Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml34
-rw-r--r--Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml18
-rw-r--r--Documentation/devicetree/bindings/pci/toshiba,tc9563.yaml179
-rw-r--r--Documentation/devicetree/bindings/pci/v3,v360epc-pci.yaml100
-rw-r--r--Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt76
-rw-r--r--Documentation/devicetree/bindings/pci/versatile.yaml3
-rw-r--r--Documentation/devicetree/bindings/pci/xgene-pci-msi.txt68
-rw-r--r--Documentation/devicetree/bindings/pci/xgene-pci.txt50
-rw-r--r--Documentation/devicetree/bindings/pci/xilinx-versal-cpm.yaml127
-rw-r--r--Documentation/devicetree/bindings/pci/xlnx,axi-pcie-host.yaml2
-rw-r--r--Documentation/devicetree/bindings/pci/xlnx,nwl-pcie.yaml11
-rw-r--r--Documentation/devicetree/bindings/pci/xlnx,xdma-host.yaml38
-rw-r--r--Documentation/devicetree/bindings/perf/apm,xgene-pmu.yaml142
-rw-r--r--Documentation/devicetree/bindings/perf/apm-xgene-pmu.txt112
-rw-r--r--Documentation/devicetree/bindings/perf/arm,cmn.yaml1
-rw-r--r--Documentation/devicetree/bindings/perf/arm,coresight-pmu.yaml39
-rw-r--r--Documentation/devicetree/bindings/perf/arm,ni.yaml30
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml35
-rw-r--r--Documentation/devicetree/bindings/perf/starfive,jh8100-starlink-pmu.yaml46
-rw-r--r--Documentation/devicetree/bindings/phy/airoha,en7581-pcie-phy.yaml69
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun50i-a64-usb-phy.yaml12
-rw-r--r--Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml3
-rw-r--r--Documentation/devicetree/bindings/phy/apm,xgene-phy.yaml169
-rw-r--r--Documentation/devicetree/bindings/phy/apm-xgene-phy.txt76
-rw-r--r--Documentation/devicetree/bindings/phy/bcm-ns-usb2-phy.yaml19
-rw-r--r--Documentation/devicetree/bindings/phy/berlin-sata-phy.txt36
-rw-r--r--Documentation/devicetree/bindings/phy/berlin-usb-phy.txt16
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml5
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.txt30
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,ns2-drd-phy.yaml62
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sata-phy.yaml8
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.txt41
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sr-pcie-phy.yaml46
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,sr-usb-combo-phy.yaml65
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,stingray-usb-phy.txt32
-rw-r--r--Documentation/devicetree/bindings/phy/dm816x-phy.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8mp-hdmi-phy.yaml62
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml86
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,imx8qm-hsio.yaml164
-rw-r--r--Documentation/devicetree/bindings/phy/fsl,mxs-usbphy.yaml18
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,hi3798cv200-combphy.yaml56
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,hi6220-usb-phy.yaml35
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,hix5hd2-sata-phy.yaml48
-rw-r--r--Documentation/devicetree/bindings/phy/hisilicon,inno-usb2-phy.yaml93
-rw-r--r--Documentation/devicetree/bindings/phy/hix5hd2-phy.txt22
-rw-r--r--Documentation/devicetree/bindings/phy/img,pistachio-usb-phy.yaml62
-rw-r--r--Documentation/devicetree/bindings/phy/keystone-usb-phy.txt19
-rw-r--r--Documentation/devicetree/bindings/phy/lantiq,ase-usb2-phy.yaml71
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,armada-375-usb-cluster.yaml40
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,armada-380-comphy.yaml83
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,armada-cp110-utmi-phy.yaml6
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,berlin2-sata-phy.yaml76
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,berlin2-usb-phy.yaml42
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,comphy-cp110.yaml167
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,mmp2-usb-phy.yaml37
-rw-r--r--Documentation/devicetree/bindings/phy/marvell,mvebu-sata-phy.yaml47
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,mt7988-xfi-tphy.yaml80
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,mt8365-csi-rx.yaml79
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,tphy.yaml14
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml3
-rw-r--r--Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml16
-rw-r--r--Documentation/devicetree/bindings/phy/microchip,sparx5-serdes.yaml17
-rw-r--r--Documentation/devicetree/bindings/phy/mixel,mipi-dsi-phy.yaml5
-rw-r--r--Documentation/devicetree/bindings/phy/motorola,cpcap-usb-phy.yaml107
-rw-r--r--Documentation/devicetree/bindings/phy/motorola,mapphone-mdm6600.yaml81
-rw-r--r--Documentation/devicetree/bindings/phy/nuvoton,ma35d1-usb2-phy.yaml45
-rw-r--r--Documentation/devicetree/bindings/phy/nxp,ptn3222.yaml55
-rw-r--r--Documentation/devicetree/bindings/phy/phy-armada38x-comphy.txt48
-rw-r--r--Documentation/devicetree/bindings/phy/phy-ath79-usb.txt18
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml2
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml14
-rw-r--r--Documentation/devicetree/bindings/phy/phy-cpcap-usb.txt40
-rw-r--r--Documentation/devicetree/bindings/phy/phy-da8xx-usb.txt40
-rw-r--r--Documentation/devicetree/bindings/phy/phy-hi3798cv200-combphy.txt59
-rw-r--r--Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt16
-rw-r--r--Documentation/devicetree/bindings/phy/phy-hisi-inno-usb2.txt71
-rw-r--r--Documentation/devicetree/bindings/phy/phy-lantiq-rcu-usb2.txt40
-rw-r--r--Documentation/devicetree/bindings/phy/phy-lpc18xx-usb-otg.txt26
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mapphone-mdm6600.txt29
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt94
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mvebu.txt42
-rw-r--r--Documentation/devicetree/bindings/phy/phy-pxa-usb.txt18
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-inno-hdmi.txt43
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-naneng-combphy.yaml13
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt84
-rw-r--r--Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml152
-rw-r--r--Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml4
-rw-r--r--Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt29
-rw-r--r--Documentation/devicetree/bindings/phy/qca,ar7100-usb-phy.yaml49
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,edp-phy.yaml17
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,hdmi-phy-qmp.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,ipq5332-uniphy-pcie-phy.yaml109
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml19
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,m31-eusb2-phy.yaml79
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,msm8998-qmp-usb3-phy.yaml186
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml2
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sa8775p-dwmac-sgmii-phy.yaml7
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sata-phy.yaml55
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml54
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml118
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml32
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml80
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,snps-eusb2-phy.yaml2
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml16
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-8x16-phy.txt76
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml2
-rw-r--r--Documentation/devicetree/bindings/phy/qcom,usb-snps-femto-v2.yaml6
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-apq8064-sata-phy.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-ipq806x-sata-phy.txt23
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,rzg3e-usb3-phy.yaml63
-rw-r--r--Documentation/devicetree/bindings/phy/renesas,usb2-phy.yaml34
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,inno-usb2phy.yaml49
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,pcie3-phy.yaml13
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,px30-dsi-dphy.yaml1
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3228-hdmi-phy.yaml97
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3399-emmc-phy.yaml64
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3399-pcie-phy.yaml45
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3399-typec-phy.yaml116
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml120
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip,rk3588-mipi-dcphy.yaml87
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt43
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-inno-csi-dphy.yaml65
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt36
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,exynos2200-eusb2-phy.yaml80
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,mipi-video-phy.yaml29
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,ufs-phy.yaml2
-rw-r--r--Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml142
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-ahci-phy.yaml8
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-pcie-phy.yaml8
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-usb3hs-phy.yaml7
-rw-r--r--Documentation/devicetree/bindings/phy/socionext,uniphier-usb3ss-phy.yaml7
-rw-r--r--Documentation/devicetree/bindings/phy/sophgo,cv1800b-usb2-phy.yaml54
-rw-r--r--Documentation/devicetree/bindings/phy/st,spear1310-miphy.yaml53
-rw-r--r--Documentation/devicetree/bindings/phy/st,stm32mp25-combophy.yaml119
-rw-r--r--Documentation/devicetree/bindings/phy/st-spear-miphy.txt15
-rw-r--r--Documentation/devicetree/bindings/phy/starfive,jh7110-dphy-tx.yaml68
-rw-r--r--Documentation/devicetree/bindings/phy/ti,da830-usb-phy.yaml53
-rw-r--r--Documentation/devicetree/bindings/phy/ti,dm8168-usb-phy.yaml58
-rw-r--r--Documentation/devicetree/bindings/phy/ti,keystone-usbphy.yaml37
-rw-r--r--Documentation/devicetree/bindings/phy/ti,tcan104x-can.yaml83
-rw-r--r--Documentation/devicetree/bindings/pinctrl/actions,s700-pinctrl.txt170
-rw-r--r--Documentation/devicetree/bindings/pinctrl/actions,s700-pinctrl.yaml204
-rw-r--r--Documentation/devicetree/bindings/pinctrl/actions,s900-pinctrl.txt204
-rw-r--r--Documentation/devicetree/bindings/pinctrl/actions,s900-pinctrl.yaml219
-rw-r--r--Documentation/devicetree/bindings/pinctrl/airoha,an7583-pinctrl.yaml402
-rw-r--r--Documentation/devicetree/bindings/pinctrl/airoha,en7581-pinctrl.yaml403
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml13
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sun55i-a523-pinctrl.yaml175
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-a1.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-g12a-aobus.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson-pinctrl-g12a-periphs.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson8-pinctrl-aobus.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,meson8-pinctrl-cbus.yaml4
-rw-r--r--Documentation/devicetree/bindings/pinctrl/amlogic,pinctrl-a4.yaml139
-rw-r--r--Documentation/devicetree/bindings/pinctrl/apple,pinctrl.yaml22
-rw-r--r--Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml169
-rw-r--r--Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml188
-rw-r--r--Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml516
-rw-r--r--Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt176
-rw-r--r--Documentation/devicetree/bindings/pinctrl/atmel,at91-pio4-pinctrl.txt1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/atmel,at91rm9200-pinctrl.yaml184
-rw-r--r--Documentation/devicetree/bindings/pinctrl/awinic,aw9523-pinctrl.yaml139
-rw-r--r--Documentation/devicetree/bindings/pinctrl/berlin,pinctrl.txt47
-rw-r--r--Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.txt126
-rw-r--r--Documentation/devicetree/bindings/pinctrl/bitmain,bm1880-pinctrl.yaml132
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm21664-pinctrl.yaml151
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm2712c0-pinctrl.yaml137
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt99
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.yaml120
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.txt123
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.yaml111
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,ns2-pinmux.txt102
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,ns2-pinmux.yaml111
-rw-r--r--Documentation/devicetree/bindings/pinctrl/canaan,k230-pinctrl.yaml127
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml3
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cix,sky1-pinctrl.yaml91
-rw-r--r--Documentation/devicetree/bindings/pinctrl/cypress,cy8c95x0.yaml24
-rw-r--r--Documentation/devicetree/bindings/pinctrl/eswin,eic7700-pinctrl.yaml156
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx35-pinctrl.txt33
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx35-pinctrl.yaml184
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx50-pinctrl.txt32
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx51-pinctrl.txt32
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx53-pinctrl.txt32
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6dl-pinctrl.txt38
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt38
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6sl-pinctrl.txt39
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6sll-pinctrl.txt40
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6sx-pinctrl.txt36
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6ul-pinctrl.txt37
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx7ulp-iomuxc1.yaml99
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx7ulp-pinctrl.txt53
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx9-pinctrl.yaml86
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx93-pinctrl.yaml85
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,mxs-pinctrl.txt127
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,vf610-iomuxc.yaml83
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,vf610-pinctrl.txt41
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,ap806-pinctrl.yaml61
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt195
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada-7k-pinctrl.yaml72
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada3710-xb-pinctrl.yaml124
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,berlin2-soc-pinctrl.yaml86
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml97
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt6779-pinctrl.yaml5
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt6878-pinctrl.yaml211
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt6893-pinctrl.yaml193
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml173
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt7988-pinctrl.yaml580
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml72
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8189-pinctrl.yaml213
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8192-pinctrl.yaml78
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mediatek,mt8196-pinctrl.yaml236
-rw-r--r--Documentation/devicetree/bindings/pinctrl/microchip,mcp23s08.yaml161
-rw-r--r--Documentation/devicetree/bindings/pinctrl/microchip,mpfs-pinctrl-iomux0.yaml89
-rw-r--r--Documentation/devicetree/bindings/pinctrl/microchip,pic64gx-pinctrl-gpio2.yaml74
-rw-r--r--Documentation/devicetree/bindings/pinctrl/microchip,sparx5-sgpio.yaml12
-rw-r--r--Documentation/devicetree/bindings/pinctrl/mscc,ocelot-pinctrl.yaml27
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nuvoton,ma35d1-pinctrl.yaml178
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nuvoton,npcm845-pinctrl.yaml70
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nuvoton,wpcm450-pinctrl.yaml3
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra186-pinmux.yaml285
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra234-pinmux-aon.yaml7
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra234-pinmux-common.yaml84
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra234-pinmux.yaml7
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.txt71
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nxp,lpc1850-scu.yaml79
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml22
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-mcp23s08.txt148
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-single.yaml22
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt95
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.yaml110
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt188
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.yaml129
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,glymur-tlmm.yaml133
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq4019-pinctrl.txt85
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq4019-pinctrl.yaml103
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq5018-tlmm.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq5332-tlmm.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq5424-tlmm.yaml114
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt101
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.yaml108
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq8074-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq9574-tlmm.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,kaanapali-tlmm.yaml127
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,lpass-lpi-common.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,milos-tlmm.yaml133
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8660-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8917-pinctrl.yaml160
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.yaml6
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8976-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8994-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8998-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.yaml74
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml9
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qcs404-pinctrl.yaml3
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qcs615-tlmm.yaml124
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,qcs8300-tlmm.yaml118
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml10
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sar2130p-tlmm.yaml138
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sc7180-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sc7280-lpass-lpi-pinctrl.yaml16
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm630-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm660-lpass-lpi-pinctrl.yaml109
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sdm845-pinctrl.yaml3
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm4250-lpass-lpi-pinctrl.yaml118
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm4450-tlmm.yaml54
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6115-lpass-lpi-pinctrl.yaml9
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6125-tlmm.yaml1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8150-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8350-lpass-lpi-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8650-lpass-lpi-pinctrl.yaml8
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sm8750-tlmm.yaml138
-rw-r--r--Documentation/devicetree/bindings/pinctrl/raspberrypi,rp1-gpio.yaml231
-rw-r--r--Documentation/devicetree/bindings/pinctrl/realtek,rtd1315e-pinctrl.yaml54
-rw-r--r--Documentation/devicetree/bindings/pinctrl/realtek,rtd1319d-pinctrl.yaml54
-rw-r--r--Documentation/devicetree/bindings/pinctrl/realtek,rtd1619b-pinctrl.yaml54
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,pfc.yaml4
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,r9a09g077-pinctrl.yaml172
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rza1-ports.yaml5
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml65
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzn1-pinctrl.yaml4
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,rzv2m-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml6
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung,pinctrl-wakeup-interrupt.yaml46
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung,pinctrl.yaml35
-rw-r--r--Documentation/devicetree/bindings/pinctrl/semtech,sx1501q.yaml43
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sophgo,cv1800-pinctrl.yaml122
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sophgo,sg2042-pinctrl.yaml129
-rw-r--r--Documentation/devicetree/bindings/pinctrl/spacemit,k1-pinctrl.yaml142
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sprd,pinctrl.txt83
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sprd,sc9860-pinctrl.txt70
-rw-r--r--Documentation/devicetree/bindings/pinctrl/sprd,sc9860-pinctrl.yaml199
-rw-r--r--Documentation/devicetree/bindings/pinctrl/st,stm32-hdp.yaml187
-rw-r--r--Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml130
-rw-r--r--Documentation/devicetree/bindings/pinctrl/starfive,jh7100-pinctrl.yaml1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/thead,th1520-pinctrl.yaml176
-rw-r--r--Documentation/devicetree/bindings/pinctrl/toshiba,visconti-pinctrl.yaml27
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,pinctrl-zynq.yaml210
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,versal-pinctrl.yaml279
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.yaml217
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,zynqmp-pinctrl.yaml349
-rw-r--r--Documentation/devicetree/bindings/pmem/pmem-region.txt65
-rw-r--r--Documentation/devicetree/bindings/pmem/pmem-region.yaml48
-rw-r--r--Documentation/devicetree/bindings/power/actions,owl-sps.txt21
-rw-r--r--Documentation/devicetree/bindings/power/actions,s500-sps.yaml39
-rw-r--r--Documentation/devicetree/bindings/power/allwinner,sun20i-d1-ppu.yaml3
-rw-r--r--Documentation/devicetree/bindings/power/allwinner,sun50i-h6-prcm-ppu.yaml42
-rw-r--r--Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml5
-rw-r--r--Documentation/devicetree/bindings/power/apple,pmgr-pwrstate.yaml22
-rw-r--r--Documentation/devicetree/bindings/power/domain-idle-state.yaml5
-rw-r--r--Documentation/devicetree/bindings/power/fsl,imx-gpc.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/mediatek,mt8196-gpufreq.yaml117
-rw-r--r--Documentation/devicetree/bindings/power/mediatek,power-controller.yaml48
-rw-r--r--Documentation/devicetree/bindings/power/power-domain.yaml2
-rw-r--r--Documentation/devicetree/bindings/power/qcom,kpss-acc-v2.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/qcom,rpmpd.yaml10
-rw-r--r--Documentation/devicetree/bindings/power/raspberrypi,bcm2835-power.yaml42
-rw-r--r--Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml2
-rw-r--r--Documentation/devicetree/bindings/power/renesas,sysc-rmobile.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/reset/apple,smc-reboot.yaml40
-rw-r--r--Documentation/devicetree/bindings/power/reset/atmel,sama5d2-shdwc.yaml8
-rw-r--r--Documentation/devicetree/bindings/power/reset/mt6323-poweroff.txt20
-rw-r--r--Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/reset/qcom,pon.yaml79
-rw-r--r--Documentation/devicetree/bindings/power/reset/reboot-mode.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml41
-rw-r--r--Documentation/devicetree/bindings/power/reset/toradex,smarc-ec.yaml52
-rw-r--r--Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/rockchip,power-controller.yaml8
-rw-r--r--Documentation/devicetree/bindings/power/rockchip-io-domain.yaml24
-rw-r--r--Documentation/devicetree/bindings/power/supply/active-semi,act8945a-charger.yaml76
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq24190.yaml9
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq2515x.yaml7
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq256xx.yaml5
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq25980.yaml40
-rw-r--r--Documentation/devicetree/bindings/power/supply/bq27xxx.yaml37
-rw-r--r--Documentation/devicetree/bindings/power/supply/cw2015_battery.yaml5
-rw-r--r--Documentation/devicetree/bindings/power/supply/gpio-charger.yaml6
-rw-r--r--Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml14
-rw-r--r--Documentation/devicetree/bindings/power/supply/ltc4162-l.yaml24
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max17201.yaml58
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max77705.yaml50
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max8903.yaml2
-rw-r--r--Documentation/devicetree/bindings/power/supply/maxim,max8971.yaml68
-rw-r--r--Documentation/devicetree/bindings/power/supply/mt6360_charger.yaml1
-rw-r--r--Documentation/devicetree/bindings/power/supply/pegatron,chagall-ec.yaml49
-rw-r--r--Documentation/devicetree/bindings/power/supply/qcom,pmi8998-charger.yaml10
-rw-r--r--Documentation/devicetree/bindings/power/supply/richtek,rt5033-charger.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/supply/richtek,rt9756.yaml72
-rw-r--r--Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml21
-rw-r--r--Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml45
-rw-r--r--Documentation/devicetree/bindings/power/supply/st,stc3117.yaml74
-rw-r--r--Documentation/devicetree/bindings/power/supply/stericsson,ab8500-btemp.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/supply/stericsson,ab8500-chargalg.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/supply/stericsson,ab8500-charger.yaml5
-rw-r--r--Documentation/devicetree/bindings/power/supply/stericsson,ab8500-fg.yaml4
-rw-r--r--Documentation/devicetree/bindings/power/supply/summit,smb347-charger.yaml5
-rw-r--r--Documentation/devicetree/bindings/power/supply/ti,twl6030-charger.yaml48
-rw-r--r--Documentation/devicetree/bindings/power/supply/x-powers,axp20x-battery-power-supply.yaml23
-rw-r--r--Documentation/devicetree/bindings/power/supply/x-powers,axp20x-usb-power-supply.yaml72
-rw-r--r--Documentation/devicetree/bindings/power/wakeup-source.txt8
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/dma.txt204
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mcu-mpc8349emitx.txt17
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpic.txt231
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt111
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/pmc.txt63
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/pmc.yaml152
-rw-r--r--Documentation/devicetree/bindings/powerpc/nintendo/wii.txt4
-rw-r--r--Documentation/devicetree/bindings/pps/pps-gpio.yaml22
-rw-r--r--Documentation/devicetree/bindings/ptp/fsl,ptp.yaml156
-rw-r--r--Documentation/devicetree/bindings/ptp/nxp,ptp-netc.yaml63
-rw-r--r--Documentation/devicetree/bindings/ptp/ptp-qoriq.txt87
-rw-r--r--Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml57
-rw-r--r--Documentation/devicetree/bindings/pwm/airoha,en7581-pwm.yaml34
-rw-r--r--Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml10
-rw-r--r--Documentation/devicetree/bindings/pwm/apple,s5l-fpwm.yaml3
-rw-r--r--Documentation/devicetree/bindings/pwm/argon40,fan-hat.yaml48
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel,at91sam-pwm.yaml7
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel,hlcdc-pwm.yaml35
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt29
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,bcm7038-pwm.yaml8
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,kona-pwm.yaml8
-rw-r--r--Documentation/devicetree/bindings/pwm/cirrus,ep9301-pwm.yaml53
-rw-r--r--Documentation/devicetree/bindings/pwm/fsl,vf610-ftm-pwm.yaml97
-rw-r--r--Documentation/devicetree/bindings/pwm/google,cros-ec-pwm.yaml3
-rw-r--r--Documentation/devicetree/bindings/pwm/imx-pwm.yaml1
-rw-r--r--Documentation/devicetree/bindings/pwm/imx-tpm-pwm.yaml15
-rw-r--r--Documentation/devicetree/bindings/pwm/kontron,sl28cpld-pwm.yaml2
-rw-r--r--Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml67
-rw-r--r--Documentation/devicetree/bindings/pwm/lpc1850-sct-pwm.txt20
-rw-r--r--Documentation/devicetree/bindings/pwm/lpc32xx-pwm.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/marvell,berlin-pwm.yaml44
-rw-r--r--Documentation/devicetree/bindings/pwm/marvell,pxa-pwm.yaml71
-rw-r--r--Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml7
-rw-r--r--Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml6
-rw-r--r--Documentation/devicetree/bindings/pwm/mxs-pwm.yaml1
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,lpc1850-sct-pwm.yaml54
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,lpc3220-pwm.yaml44
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,mc33xs2410.yaml118
-rw-r--r--Documentation/devicetree/bindings/pwm/opencores,pwm.yaml56
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-amlogic.yaml133
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-bcm2835.yaml1
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-berlin.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt55
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-gpio.yaml46
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-nexus-node.yaml65
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml2
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-samsung.yaml1
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-sprd.txt40
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm.yaml6
-rw-r--r--Documentation/devicetree/bindings/pwm/pxa-pwm.txt30
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.yaml1
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,rzg2l-gpt.yaml378
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml10
-rw-r--r--Documentation/devicetree/bindings/pwm/snps,dw-apb-timers-pwm2.yaml1
-rw-r--r--Documentation/devicetree/bindings/pwm/sophgo,sg2042-pwm.yaml60
-rw-r--r--Documentation/devicetree/bindings/pwm/sprd,ums512-pwm.yaml66
-rw-r--r--Documentation/devicetree/bindings/pwm/thead,th1520-pwm.yaml48
-rw-r--r--Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/via,vt8500-pwm.yaml43
-rw-r--r--Documentation/devicetree/bindings/pwm/vt8500-pwm.txt18
-rw-r--r--Documentation/devicetree/bindings/regulator/active-semi,act8945a.yaml25
-rw-r--r--Documentation/devicetree/bindings/regulator/adi,adp5055-regulator.yaml157
-rw-r--r--Documentation/devicetree/bindings/regulator/allwinner,sun20i-d1-system-ldos.yaml37
-rw-r--r--Documentation/devicetree/bindings/regulator/brcm,bcm59054.yaml56
-rw-r--r--Documentation/devicetree/bindings/regulator/brcm,bcm59056.yaml51
-rw-r--r--Documentation/devicetree/bindings/regulator/da9211.txt205
-rw-r--r--Documentation/devicetree/bindings/regulator/dlg,da9211.yaml103
-rw-r--r--Documentation/devicetree/bindings/regulator/fitipower,fp9931.yaml110
-rw-r--r--Documentation/devicetree/bindings/regulator/fixed-regulator.yaml7
-rw-r--r--Documentation/devicetree/bindings/regulator/gpio-regulator.yaml4
-rw-r--r--Documentation/devicetree/bindings/regulator/infineon,ir38060.yaml45
-rw-r--r--Documentation/devicetree/bindings/regulator/lltc,ltc3676.yaml167
-rw-r--r--Documentation/devicetree/bindings/regulator/ltc3676.txt94
-rw-r--r--Documentation/devicetree/bindings/regulator/maxim,max77838.yaml68
-rw-r--r--Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt144
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6316b-regulator.yaml76
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6316c-regulator.yaml76
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6316d-regulator.yaml75
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6331-regulator.yaml19
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6332-regulator.yaml7
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml12
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6363-regulator.yaml146
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6397-regulator.yaml238
-rw-r--r--Documentation/devicetree/bindings/regulator/mediatek,mt6873-dvfsrc-regulator.yaml45
-rw-r--r--Documentation/devicetree/bindings/regulator/microchip,mcp16502.yaml200
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml12
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6397-regulator.txt220
-rw-r--r--Documentation/devicetree/bindings/regulator/nxp,pca9450-regulator.yaml107
-rw-r--r--Documentation/devicetree/bindings/regulator/nxp,pf0900.yaml163
-rw-r--r--Documentation/devicetree/bindings/regulator/nxp,pf5300.yaml54
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,qca6390-pmu.yaml244
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml92
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,sdm845-refgen-regulator.yaml3
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml2
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,usb-vbus-regulator.yaml12
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom-labibb-regulator.yaml7
-rw-r--r--Documentation/devicetree/bindings/regulator/raspberrypi,7inch-touchscreen-panel-regulator-v2.yaml61
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator.yaml3
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt5133.yaml178
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rt6245-regulator.yaml1
-rw-r--r--Documentation/devicetree/bindings/regulator/richtek,rtq2208.yaml14
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd96801-regulator.yaml63
-rw-r--r--Documentation/devicetree/bindings/regulator/rohm,bd96802-regulator.yaml44
-rw-r--r--Documentation/devicetree/bindings/regulator/samsung,s2mpu05.yaml47
-rw-r--r--Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt43
-rw-r--r--Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.yaml46
-rw-r--r--Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml4
-rw-r--r--Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml7
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps62864.yaml2
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps65132.yaml87
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps65219.yaml27
-rw-r--r--Documentation/devicetree/bindings/regulator/tps65132-regulator.txt46
-rw-r--r--Documentation/devicetree/bindings/regulator/twl-regulator.txt80
-rw-r--r--Documentation/devicetree/bindings/regulator/vctrl-regulator.yaml80
-rw-r--r--Documentation/devicetree/bindings/regulator/vctrl.txt49
-rw-r--r--Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml15
-rw-r--r--Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml12
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml26
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,glink-rpm-edge.yaml3
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,milos-pas.yaml198
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,msm8916-mss-pil.yaml64
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml1
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,qcs404-cdsp-pil.yaml6
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,qcs404-pas.yaml2
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml6
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sa8775p-pas.yaml176
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml2
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc7280-wpss-pil.yaml8
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc8180x-pas.yaml96
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sc8280xp-pas.yaml4
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sdm845-adsp-pil.yaml6
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm6115-pas.yaml7
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm6350-pas.yaml2
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm6375-pas.yaml2
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml61
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm8350-pas.yaml53
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml126
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,smd-edge.yaml3
-rw-r--r--Documentation/devicetree/bindings/remoteproc/qcom,wcnss-pil.yaml47
-rw-r--r--Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml4
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt3
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,k3-dsp-rproc.yaml89
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,k3-m4f-rproc.yaml125
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,k3-r5f-rproc.yaml2
-rw-r--r--Documentation/devicetree/bindings/remoteproc/xlnx,zynqmp-r5fss.yaml280
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/nvidia,tegra264-bpmp-shmem.yaml15
-rw-r--r--Documentation/devicetree/bindings/reset/amlogic,meson-reset.yaml21
-rw-r--r--Documentation/devicetree/bindings/reset/atmel,at91sam9260-reset.yaml7
-rw-r--r--Documentation/devicetree/bindings/reset/brcm,bcm6345-reset.yaml4
-rw-r--r--Documentation/devicetree/bindings/reset/canaan,k230-rst.yaml39
-rw-r--r--Documentation/devicetree/bindings/reset/eswin,eic7700-reset.yaml42
-rw-r--r--Documentation/devicetree/bindings/reset/microchip,rst.yaml11
-rw-r--r--Documentation/devicetree/bindings/reset/nuvoton,ma35d1-reset.yaml3
-rw-r--r--Documentation/devicetree/bindings/reset/nuvoton,npcm750-reset.yaml18
-rw-r--r--Documentation/devicetree/bindings/reset/nxp,lpc1850-rgu.txt83
-rw-r--r--Documentation/devicetree/bindings/reset/nxp,lpc1850-rgu.yaml101
-rw-r--r--Documentation/devicetree/bindings/reset/renesas,rst.yaml2
-rw-r--r--Documentation/devicetree/bindings/reset/renesas,rzg2l-usbphy-ctrl.yaml51
-rw-r--r--Documentation/devicetree/bindings/reset/renesas,rzv2h-usb2phy-reset.yaml61
-rw-r--r--Documentation/devicetree/bindings/reset/snps,dw-reset.txt30
-rw-r--r--Documentation/devicetree/bindings/reset/snps,dw-reset.yaml39
-rw-r--r--Documentation/devicetree/bindings/reset/socionext,uniphier-glue-reset.yaml8
-rw-r--r--Documentation/devicetree/bindings/reset/sophgo,sg2042-reset.yaml42
-rw-r--r--Documentation/devicetree/bindings/reset/st,stm32-rcc.txt2
-rw-r--r--Documentation/devicetree/bindings/reset/thead,th1520-reset.yaml50
-rw-r--r--Documentation/devicetree/bindings/reset/ti,sci-reset.yaml3
-rw-r--r--Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml2
-rw-r--r--Documentation/devicetree/bindings/riscv/andes.yaml25
-rw-r--r--Documentation/devicetree/bindings/riscv/anlogic.yaml27
-rw-r--r--Documentation/devicetree/bindings/riscv/cpus.yaml46
-rw-r--r--Documentation/devicetree/bindings/riscv/eswin.yaml29
-rw-r--r--Documentation/devicetree/bindings/riscv/extensions.yaml423
-rw-r--r--Documentation/devicetree/bindings/riscv/microchip.yaml14
-rw-r--r--Documentation/devicetree/bindings/riscv/sophgo.yaml36
-rw-r--r--Documentation/devicetree/bindings/riscv/spacemit.yaml32
-rw-r--r--Documentation/devicetree/bindings/riscv/starfive.yaml12
-rw-r--r--Documentation/devicetree/bindings/riscv/tenstorrent.yaml28
-rw-r--r--Documentation/devicetree/bindings/rng/SUNW,n2-rng.yaml50
-rw-r--r--Documentation/devicetree/bindings/rng/airoha,en7581-trng.yaml38
-rw-r--r--Documentation/devicetree/bindings/rng/amlogic,meson-rng.yaml3
-rw-r--r--Documentation/devicetree/bindings/rng/atmel,at91-trng.yaml5
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm74110-rng.yaml35
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,iproc-rng200.yaml6
-rw-r--r--Documentation/devicetree/bindings/rng/hisi-rng.txt12
-rw-r--r--Documentation/devicetree/bindings/rng/hisi-rng.yaml32
-rw-r--r--Documentation/devicetree/bindings/rng/imx-rng.yaml2
-rw-r--r--Documentation/devicetree/bindings/rng/inside-secure,safexcel-eip76.yaml84
-rw-r--r--Documentation/devicetree/bindings/rng/microchip,pic32-rng.txt17
-rw-r--r--Documentation/devicetree/bindings/rng/microchip,pic32-rng.yaml40
-rw-r--r--Documentation/devicetree/bindings/rng/microsoft,vmgenid.yaml49
-rw-r--r--Documentation/devicetree/bindings/rng/omap_rng.yaml81
-rw-r--r--Documentation/devicetree/bindings/rng/rockchip,rk3568-rng.yaml61
-rw-r--r--Documentation/devicetree/bindings/rng/rockchip,rk3588-rng.yaml60
-rw-r--r--Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.yaml40
-rw-r--r--Documentation/devicetree/bindings/rng/sparc_sun_oracle_rng.txt30
-rw-r--r--Documentation/devicetree/bindings/rng/st,stm32-rng.yaml32
-rw-r--r--Documentation/devicetree/bindings/rtc/abracon,abx80x.txt31
-rw-r--r--Documentation/devicetree/bindings/rtc/abracon,abx80x.yaml98
-rw-r--r--Documentation/devicetree/bindings/rtc/adi,max31335.yaml4
-rw-r--r--Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml4
-rw-r--r--Documentation/devicetree/bindings/rtc/alphascale,asm9260-rtc.txt19
-rw-r--r--Documentation/devicetree/bindings/rtc/alphascale,asm9260-rtc.yaml50
-rw-r--r--Documentation/devicetree/bindings/rtc/amlogic,a4-rtc.yaml68
-rw-r--r--Documentation/devicetree/bindings/rtc/apm,xgene-rtc.yaml45
-rw-r--r--Documentation/devicetree/bindings/rtc/armada-380-rtc.txt24
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml4
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91sam9260-rtt.yaml5
-rw-r--r--Documentation/devicetree/bindings/rtc/digicolor-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/fsl,ls-ftm-alarm.yaml73
-rw-r--r--Documentation/devicetree/bindings/rtc/fsl,stmp3xxx-rtc.yaml51
-rw-r--r--Documentation/devicetree/bindings/rtc/google,goldfish-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/isil,isl12057.txt74
-rw-r--r--Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/marvell,armada-380-rtc.yaml51
-rw-r--r--Documentation/devicetree/bindings/rtc/marvell,pxa-rtc.yaml40
-rw-r--r--Documentation/devicetree/bindings/rtc/maxim,ds1742.txt12
-rw-r--r--Documentation/devicetree/bindings/rtc/mediatek,mt2712-rtc.yaml39
-rw-r--r--Documentation/devicetree/bindings/rtc/mediatek,mt7622-rtc.yaml52
-rw-r--r--Documentation/devicetree/bindings/rtc/microchip,mfps-rtc.yaml67
-rw-r--r--Documentation/devicetree/bindings/rtc/microchip,mpfs-rtc.yaml69
-rw-r--r--Documentation/devicetree/bindings/rtc/microcrystal,rv3028.yaml3
-rw-r--r--Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.yaml1
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.txt21
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,lpc1788-rtc.yaml63
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,lpc3220-rtc.yaml49
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml3
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,pcf85063.yaml33
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,s32g-rtc.yaml72
-rw-r--r--Documentation/devicetree/bindings/rtc/orion-rtc.txt18
-rw-r--r--Documentation/devicetree/bindings/rtc/pxa-rtc.txt14
-rw-r--r--Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml11
-rw-r--r--Documentation/devicetree/bindings/rtc/renesas,rz-rtca3.yaml84
-rw-r--r--Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml30
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-aspeed.txt22
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-fsl-ftm-alarm.txt36
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mt2712.txt14
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mt6397.txt31
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mt7622.txt21
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-mxc.yaml10
-rw-r--r--Documentation/devicetree/bindings/rtc/s3c-rtc.yaml40
-rw-r--r--Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml2
-rw-r--r--Documentation/devicetree/bindings/rtc/sophgo,cv1800b-rtc.yaml86
-rw-r--r--Documentation/devicetree/bindings/rtc/spear-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/sprd,sc2731-rtc.yaml33
-rw-r--r--Documentation/devicetree/bindings/rtc/sprd,sc27xx-rtc.txt26
-rw-r--r--Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml33
-rw-r--r--Documentation/devicetree/bindings/rtc/stmp3xxx-rtc.txt21
-rw-r--r--Documentation/devicetree/bindings/rtc/trivial-rtc.yaml34
-rw-r--r--Documentation/devicetree/bindings/rtc/twl-rtc.txt11
-rw-r--r--Documentation/devicetree/bindings/rtc/via,vt8500-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/xgene-rtc.txt28
-rw-r--r--Documentation/devicetree/bindings/rtc/xlnx,zynqmp-rtc.yaml11
-rw-r--r--Documentation/devicetree/bindings/serial/8250.yaml93
-rw-r--r--Documentation/devicetree/bindings/serial/8250_omap.yaml24
-rw-r--r--Documentation/devicetree/bindings/serial/actions,owl-uart.txt16
-rw-r--r--Documentation/devicetree/bindings/serial/actions,owl-uart.yaml48
-rw-r--r--Documentation/devicetree/bindings/serial/altera_jtaguart.txt5
-rw-r--r--Documentation/devicetree/bindings/serial/altera_uart.txt8
-rw-r--r--Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml19
-rw-r--r--Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml25
-rw-r--r--Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml7
-rw-r--r--Documentation/devicetree/bindings/serial/arc-uart.txt25
-rw-r--r--Documentation/devicetree/bindings/serial/arm,mps2-uart.txt19
-rw-r--r--Documentation/devicetree/bindings/serial/arm,mps2-uart.yaml46
-rw-r--r--Documentation/devicetree/bindings/serial/arm,sbsa-uart.yaml38
-rw-r--r--Documentation/devicetree/bindings/serial/arm_sbsa_uart.txt10
-rw-r--r--Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml12
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm2835-aux-uart.txt18
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm2835-aux-uart.yaml46
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/cdns,uart.yaml11
-rw-r--r--Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt31
-rw-r--r--Documentation/devicetree/bindings/serial/cirrus,ep7209-uart.yaml56
-rw-r--r--Documentation/devicetree/bindings/serial/cnxt,cx92755-usart.yaml48
-rw-r--r--Documentation/devicetree/bindings/serial/digicolor-usart.txt27
-rw-r--r--Documentation/devicetree/bindings/serial/fsl,s32-linflexuart.yaml4
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-lpuart.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/lantiq,asc.yaml56
-rw-r--r--Documentation/devicetree/bindings/serial/lantiq_asc.txt31
-rw-r--r--Documentation/devicetree/bindings/serial/marvell,armada-3700-uart.yaml102
-rw-r--r--Documentation/devicetree/bindings/serial/mediatek,uart.yaml4
-rw-r--r--Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt29
-rw-r--r--Documentation/devicetree/bindings/serial/microchip,pic32mzda-uart.yaml53
-rw-r--r--Documentation/devicetree/bindings/serial/milbeaut-uart.txt21
-rw-r--r--Documentation/devicetree/bindings/serial/mrvl,pxa-ssp.txt64
-rw-r--r--Documentation/devicetree/bindings/serial/mvebu-uart.txt56
-rw-r--r--Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/nvidia,tegra264-utc.yaml73
-rw-r--r--Documentation/devicetree/bindings/serial/nxp,lpc3220-hsuart.yaml39
-rw-r--r--Documentation/devicetree/bindings/serial/nxp,sc16is7xx.yaml8
-rw-r--r--Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt14
-rw-r--r--Documentation/devicetree/bindings/serial/pl011.yaml3
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,msm-uart.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,msm-uartdm.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,sa8255p-geni-uart.yaml69
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,serial-geni-qcom.yaml1
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,hscif.yaml8
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,rsci.yaml83
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,scif.yaml152
-rw-r--r--Documentation/devicetree/bindings/serial/rs485.yaml19
-rw-r--r--Documentation/devicetree/bindings/serial/samsung_uart.yaml92
-rw-r--r--Documentation/devicetree/bindings/serial/serial-peripheral-props.yaml41
-rw-r--r--Documentation/devicetree/bindings/serial/serial.yaml26
-rw-r--r--Documentation/devicetree/bindings/serial/snps,arc-uart.yaml51
-rw-r--r--Documentation/devicetree/bindings/serial/snps-dw-apb-uart.yaml57
-rw-r--r--Documentation/devicetree/bindings/serial/socionext,milbeaut-usio-uart.yaml56
-rw-r--r--Documentation/devicetree/bindings/serial/sprd-uart.yaml8
-rw-r--r--Documentation/devicetree/bindings/serial/st,asc.yaml55
-rw-r--r--Documentation/devicetree/bindings/serial/st,stm32-uart.yaml7
-rw-r--r--Documentation/devicetree/bindings/serial/st-asc.txt18
-rw-r--r--Documentation/devicetree/bindings/serial/via,vt8500-uart.yaml46
-rw-r--r--Documentation/devicetree/bindings/serial/vt8500-uart.txt27
-rw-r--r--Documentation/devicetree/bindings/siox/eckelmann,siox-gpio.txt19
-rw-r--r--Documentation/devicetree/bindings/siox/eckelmann,siox-gpio.yaml48
-rw-r--r--Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml2
-rw-r--r--Documentation/devicetree/bindings/slimbus/qcom,slim.yaml86
-rw-r--r--Documentation/devicetree/bindings/slimbus/slimbus.yaml29
-rw-r--r--Documentation/devicetree/bindings/soc/altera/altr,sys-mgr.yaml51
-rw-r--r--Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-gx-clk-measure.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-gx-hhi-sysctrl.yaml42
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/brcm,bcm2711-avs-monitor.yaml44
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/brcm,bcm2835-pm.yaml38
-rw-r--r--Documentation/devicetree/bindings/soc/bcm/raspberrypi,bcm2835-power.txt47
-rw-r--r--Documentation/devicetree/bindings/soc/cirrus/cirrus,ep9301-syscon.yaml94
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/bman-portals.txt56
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/bman.txt137
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-firmware.yaml48
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-ic.yaml47
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-muram.yaml70
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-si.yaml40
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-siram.yaml39
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-tsa.yaml210
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe-ucc-qmc.yaml197
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,qe.yaml148
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/fsl,ucc-hdlc.yaml140
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/network.txt130
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt178
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,bman-portal.yaml52
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,bman.yaml83
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,imx23-digctl.yaml53
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml3
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml6
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml56
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml69
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,qman-portal.yaml112
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,qman.yaml93
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,rcpm.yaml87
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,vf610-src.yaml47
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/qman-portals.txt134
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/qman.txt187
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/rcpm.txt69
-rw-r--r--Documentation/devicetree/bindings/soc/google/google,gs101-pmu-intr-gen.yaml35
-rw-r--r--Documentation/devicetree/bindings/soc/hisilicon/hisilicon,hi3660-usb3-otg-bc.yaml46
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,aips-bus.yaml5
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx-anatop.yaml146
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx-iomuxc-gpr.yaml35
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx8mp-hdmi-blk-ctrl.yaml22
-rw-r--r--Documentation/devicetree/bindings/soc/imx/fsl,imx93-media-blk-ctrl.yaml59
-rw-r--r--Documentation/devicetree/bindings/soc/intel/intel,lgm-syscon.yaml57
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,mt8183-dvfsrc.yaml84
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,mutex.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/mediatek,pwrap.yaml16
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/scpsys.txt1
-rw-r--r--Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml29
-rw-r--r--Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml58
-rw-r--r--Documentation/devicetree/bindings/soc/microchip/microchip,sparx5-cpu-syscon.yaml49
-rw-r--r--Documentation/devicetree/bindings/soc/mobileye/mobileye,eyeq5-olb.yaml352
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml10
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,dcc.yaml1
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,eud.yaml38
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml5
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,gsbi.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,pbs.yaml46
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,pmic-glink.yaml36
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml4
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,rpm.yaml15
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,rpmh-rsc.yaml26
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,sa8255p-geni-se-qup.yaml107
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,saw2.yaml120
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,se-common-props.yaml26
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml74
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smd.yaml4
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.yaml8
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,smsm.yaml32
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml85
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.yaml9
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas,r9a09g057-sys.yaml55
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas-soc.yaml73
-rw-r--r--Documentation/devicetree/bindings/soc/renesas/renesas.yaml137
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/grf.yaml105
-rw-r--r--Documentation/devicetree/bindings/soc/samsung/exynos-pmu.yaml25
-rw-r--r--Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml121
-rw-r--r--Documentation/devicetree/bindings/soc/samsung/samsung,exynos-sysreg.yaml41
-rw-r--r--Documentation/devicetree/bindings/soc/sophgo/sophgo,cv1800b-top-syscon.yaml80
-rw-r--r--Documentation/devicetree/bindings/soc/sophgo/sophgo,sg2044-top-syscon.yaml49
-rw-r--r--Documentation/devicetree/bindings/soc/sophgo/sophgo.yaml52
-rw-r--r--Documentation/devicetree/bindings/soc/spacemit/spacemit,k1-syscon.yaml95
-rw-r--r--Documentation/devicetree/bindings/soc/sprd/sprd,sc9863a-glbregs.yaml55
-rw-r--r--Documentation/devicetree/bindings/soc/sti/st,sti-syscon.yaml9
-rw-r--r--Documentation/devicetree/bindings/soc/tegra/nvidia,tegra20-pmc.yaml12
-rw-r--r--Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/ti/ti,am654-serdes-ctrl.yaml43
-rw-r--r--Documentation/devicetree/bindings/soc/ti/ti,j721e-system-controller.yaml (renamed from Documentation/devicetree/bindings/mfd/ti,j721e-system-controller.yaml)27
-rw-r--r--Documentation/devicetree/bindings/soc/ti/ti,j784s4-bist.yaml63
-rw-r--r--Documentation/devicetree/bindings/soc/ti/ti,pruss.yaml42
-rw-r--r--Documentation/devicetree/bindings/soc/ti/wkup-m3-ipc.yaml32
-rw-r--r--Documentation/devicetree/bindings/soc/xilinx/xilinx.yaml162
-rw-r--r--Documentation/devicetree/bindings/soc/xilinx/xlnx,vcu.txt26
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau1372.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau1373.yaml111
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau7002.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau7118.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/adi,max98363.yaml60
-rw-r--r--Documentation/devicetree/bindings/sound/adi,ssm2518.yaml20
-rw-r--r--Documentation/devicetree/bindings/sound/adi,ssm2602.txt19
-rw-r--r--Documentation/devicetree/bindings/sound/adi,ssm3515.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/ak4104.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/ak4375.yaml60
-rw-r--r--Documentation/devicetree/bindings/sound/ak4554.txt11
-rw-r--r--Documentation/devicetree/bindings/sound/ak4613.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/ak4642.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/alc5623.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml90
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,axg-sound-card.yaml7
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.txt58
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,g12a-tohdmitx.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml8
-rw-r--r--Documentation/devicetree/bindings/sound/apple,mca.yaml17
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4104.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4375.yaml60
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4458.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4554.yaml27
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4613.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4619.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/asahi-kasei,ak4642.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,asoc-wm8904.yaml84
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,at91-ssc.yaml109
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,at91sam9g20ek-wm8731.yaml72
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,sam9x5-wm8731-audio.yaml76
-rw-r--r--Documentation/devicetree/bindings/sound/atmel,sama5d2-classd.yaml7
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-at91sam9g20ek-wm8731-audio.txt26
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-sam9x5-wm8731-audio.txt35
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-wm8904.txt55
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-card2.yaml15
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-port.yaml11
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/awinic,aw88395.yaml7
-rw-r--r--Documentation/devicetree/bindings/sound/brcm,bcm2835-i2s.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/brcm,bcm2835-i2s.yaml51
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs35l41.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs35l45.yaml3
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs4270.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs4271.yaml111
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml11
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs42l84.yaml56
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs42xx8.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs48l32.yaml195
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,cs530x.yaml93
-rw-r--r--Documentation/devicetree/bindings/sound/cirrus,ep9301-i2s.yaml16
-rw-r--r--Documentation/devicetree/bindings/sound/cs4265.txt29
-rw-r--r--Documentation/devicetree/bindings/sound/cs4270.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/cs4271.txt57
-rw-r--r--Documentation/devicetree/bindings/sound/cs42xx8.txt34
-rw-r--r--Documentation/devicetree/bindings/sound/cs4341.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/cs4349.txt19
-rw-r--r--Documentation/devicetree/bindings/sound/da7213.txt45
-rw-r--r--Documentation/devicetree/bindings/sound/da9055.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml18
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcbsp.txt50
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcbsp.yaml113
-rw-r--r--Documentation/devicetree/bindings/sound/dlg,da7213.yaml103
-rw-r--r--Documentation/devicetree/bindings/sound/dmic-codec.yaml3
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es7134.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es71x4.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es7241.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es7241.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8316.yaml25
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8326.yaml14
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8328.yaml15
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8375.yaml71
-rw-r--r--Documentation/devicetree/bindings/sound/everest,es8389.yaml50
-rw-r--r--Documentation/devicetree/bindings/sound/foursemi,fs2105s.yaml101
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,asrc.txt80
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,audmix.txt50
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,audmix.yaml142
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,easrc.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,esai.txt68
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,esai.yaml136
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,imx-asrc.yaml189
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,imx-audio-es8328.yaml111
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,imx95-cm7-sof.yaml64
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,micfil.yaml15
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,mqs.yaml14
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,mxs-audio-sgtl5000.yaml81
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,sai.yaml67
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,saif.yaml83
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,sgtl5000.yaml113
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,sof-cpu.yaml27
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,spdif.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,ssi.txt87
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,ssi.yaml194
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,xcvr.yaml71
-rw-r--r--Documentation/devicetree/bindings/sound/fsl-asoc-card.txt117
-rw-r--r--Documentation/devicetree/bindings/sound/fsl-asoc-card.yaml243
-rw-r--r--Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/ics43432.txt19
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-card.yaml14
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-es8328.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt56
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-spdif.txt36
-rw-r--r--Documentation/devicetree/bindings/sound/infineon,peb2466.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/inno-rk3036.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/intel,keembay-i2s.yaml32
-rw-r--r--Documentation/devicetree/bindings/sound/invensense,ics43432.yaml51
-rw-r--r--Documentation/devicetree/bindings/sound/irondevice,sma1307.yaml53
-rw-r--r--Documentation/devicetree/bindings/sound/linux,spdif-dit.yaml37
-rw-r--r--Documentation/devicetree/bindings/sound/linux,spdif.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/loongson,ls1b-ac97.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/loongson,ls2k1000-i2s.yaml68
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98088.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98088.yaml47
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98090.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98095.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98390.yaml8
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98504.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/maxim,max98925.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt2701-wm8960.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8173-afe-pcm.yaml98
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8183-audio.yaml228
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8183_da7219.yaml49
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8183_mt6358_ts3a227.yaml59
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8188-mt6359.yaml39
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8189-afe-pcm.yaml178
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8189-nau8825.yaml101
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8365-afe.yaml130
-rw-r--r--Documentation/devicetree/bindings/sound/mediatek,mt8365-mt6357.yaml107
-rw-r--r--Documentation/devicetree/bindings/sound/microchip,sama7g5-i2smcc.yaml11
-rw-r--r--Documentation/devicetree/bindings/sound/microchip,sama7g5-spdifrx.yaml5
-rw-r--r--Documentation/devicetree/bindings/sound/mscc,zl38060.yaml72
-rw-r--r--Documentation/devicetree/bindings/sound/mt2701-wm8960.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/mt6359.yaml10
-rw-r--r--Documentation/devicetree/bindings/sound/mt8183-afe-pcm.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/mt8183-da7219-max98357.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/mt8186-afe-pcm.yaml5
-rw-r--r--Documentation/devicetree/bindings/sound/mt8186-mt6366-da7219-max98357.yaml130
-rw-r--r--Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml120
-rw-r--r--Documentation/devicetree/bindings/sound/mt8192-afe-pcm.yaml5
-rw-r--r--Documentation/devicetree/bindings/sound/mt8192-mt6359-rt1015-rt5682.yaml139
-rw-r--r--Documentation/devicetree/bindings/sound/mt8195-mt6359.yaml138
-rw-r--r--Documentation/devicetree/bindings/sound/mtk-afe-pcm.txt45
-rw-r--r--Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/mxs-saif.txt41
-rw-r--r--Documentation/devicetree/bindings/sound/neofidelity,ntp8835.yaml73
-rw-r--r--Documentation/devicetree/bindings/sound/neofidelity,ntp8918.yaml72
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8325.yaml80
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8540.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8810.yaml45
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8821.yaml7
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8824.yaml8
-rw-r--r--Documentation/devicetree/bindings/sound/nuvoton,nau8825.yaml14
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-graph-card.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max9808x.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra186-asrc.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra186-dspk.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-ac97.txt36
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-ac97.yaml82
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-das.txt12
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-das.yaml36
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-admaif.yaml117
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-ahub.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-dmic.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-i2s.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mbdrc.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-ope.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-peq.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.yaml109
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.yaml67
-rw-r--r--Documentation/devicetree/bindings/sound/nxp,lpc3220-i2s.yaml73
-rw-r--r--Documentation/devicetree/bindings/sound/nxp,tfa9879.yaml44
-rw-r--r--Documentation/devicetree/bindings/sound/omap-mcpdm.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/omap-twl4030.txt62
-rw-r--r--Documentation/devicetree/bindings/sound/pcm1789.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/pcm179x.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/pcm186x.txt42
-rw-r--r--Documentation/devicetree/bindings/sound/pcm5102a.txt13
-rw-r--r--Documentation/devicetree/bindings/sound/pcm512x.txt53
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,apq8016-sbc-sndcard.yaml205
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,apq8096.txt128
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml20
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml37
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml25
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,msm8916-wcd-digital-codec.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,msm8916-wcd-digital.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,pm4125-codec.yaml134
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml79
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6adm-routing.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6adm.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6afe.yaml15
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6apm-lpass-dais.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6apm.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6asm-dais.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6asm.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6core.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6prm.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,q6usb.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,sm8250.yaml146
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml5
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd937x-sdw.yaml127
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml82
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml86
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd939x-sdw.yaml69
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml99
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wcd93xx-common.yaml95
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml19
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,wsa8840.yaml21
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,alc203.yaml36
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,alc5623.yaml54
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt1015.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt1019.yaml35
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5514.yaml70
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5616.yaml12
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5631.yaml67
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5640.yaml146
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5645.yaml131
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5659.yaml129
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5677.yaml135
-rw-r--r--Documentation/devicetree/bindings/sound/realtek,rt5682.yaml156
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rsnd.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rsnd.yaml7
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rz-ssi.yaml23
-rw-r--r--Documentation/devicetree/bindings/sound/richtek,rt9123.yaml63
-rw-r--r--Documentation/devicetree/bindings/sound/richtek,rt9123p.yaml48
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,i2s-tdm.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3036-codec.yaml58
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3308-codec.yaml102
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3328-codec.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip,rk3576-sai.yaml144
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip-spdif.yaml4
-rw-r--r--Documentation/devicetree/bindings/sound/rt1015.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/rt1019.yaml35
-rw-r--r--Documentation/devicetree/bindings/sound/rt5514.txt37
-rw-r--r--Documentation/devicetree/bindings/sound/rt5631.txt48
-rw-r--r--Documentation/devicetree/bindings/sound/rt5640.txt97
-rw-r--r--Documentation/devicetree/bindings/sound/rt5645.txt76
-rw-r--r--Documentation/devicetree/bindings/sound/rt5659.txt89
-rw-r--r--Documentation/devicetree/bindings/sound/rt5677.txt78
-rw-r--r--Documentation/devicetree/bindings/sound/rt5682.txt98
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,midas-audio.yaml33
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,odroid.yaml5
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,tm2.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/serial-midi.yaml3
-rw-r--r--Documentation/devicetree/bindings/sound/sgtl5000.yaml113
-rw-r--r--Documentation/devicetree/bindings/sound/simple-audio-mux.yaml11
-rw-r--r--Documentation/devicetree/bindings/sound/simple-card.yaml12
-rw-r--r--Documentation/devicetree/bindings/sound/spacemit,k1-i2s.yaml87
-rw-r--r--Documentation/devicetree/bindings/sound/spdif-receiver.txt10
-rw-r--r--Documentation/devicetree/bindings/sound/sprd,pcm-platform.yaml56
-rw-r--r--Documentation/devicetree/bindings/sound/sprd,sc9860-mcdt.yaml47
-rw-r--r--Documentation/devicetree/bindings/sound/sprd-mcdt.txt19
-rw-r--r--Documentation/devicetree/bindings/sound/sprd-pcm.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/st,sta350.txt2
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-sai.yaml32
-rw-r--r--Documentation/devicetree/bindings/sound/st,stm32-spdifrx.yaml8
-rw-r--r--Documentation/devicetree/bindings/sound/tas2562.yaml81
-rw-r--r--Documentation/devicetree/bindings/sound/tas2770.yaml87
-rw-r--r--Documentation/devicetree/bindings/sound/tas27xx.yaml82
-rw-r--r--Documentation/devicetree/bindings/sound/tas571x.txt49
-rw-r--r--Documentation/devicetree/bindings/sound/tas5805m.yaml57
-rw-r--r--Documentation/devicetree/bindings/sound/ti,omap-twl4030.yaml98
-rw-r--r--Documentation/devicetree/bindings/sound/ti,omap4-mcpdm.yaml73
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm1681.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm1681.yaml43
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm1754.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm1862.yaml76
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm512x.yaml101
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm6240.yaml179
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas2562.yaml83
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas2770.yaml90
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas2781.yaml215
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas27xx.yaml85
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas57xx.yaml137
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas5805m.yaml57
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tlv320adc3xxx.yaml32
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tlv320adcx140.yaml209
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tlv320dac3100.yaml126
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tpa6130a2.yaml55
-rw-r--r--Documentation/devicetree/bindings/sound/ti,twl4030-audio.yaml90
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320adcx140.yaml209
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic31xx.txt77
-rw-r--r--Documentation/devicetree/bindings/sound/tpa6130a2.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/trivial-codec.yaml79
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8510.yaml41
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8523.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8580.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8711.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8728.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8737.yaml40
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8753.yaml62
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8782.yaml47
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8804.yaml58
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8903.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8904.yaml129
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8960.yaml26
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8961.yaml43
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8974.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/wlf,wm8994.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/wm8750.yaml42
-rw-r--r--Documentation/devicetree/bindings/sound/wm8770.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/wm8776.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8782.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/wm8804.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,audio-formatter.txt29
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,audio-formatter.yaml72
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,i2s.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,i2s.yaml65
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,spdif.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/xlnx,spdif.yaml77
-rw-r--r--Documentation/devicetree/bindings/sound/xmos,xvf3500.yaml63
-rw-r--r--Documentation/devicetree/bindings/sound/zl38060.yaml72
-rw-r--r--Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml19
-rw-r--r--Documentation/devicetree/bindings/spi/adi,axi-spi-engine.yaml24
-rw-r--r--Documentation/devicetree/bindings/spi/airoha,en7581-snand.yaml70
-rw-r--r--Documentation/devicetree/bindings/spi/amlogic,a1-spifc.yaml3
-rw-r--r--Documentation/devicetree/bindings/spi/amlogic,a4-spifc.yaml82
-rw-r--r--Documentation/devicetree/bindings/spi/amlogic,a4-spisg.yaml59
-rw-r--r--Documentation/devicetree/bindings/spi/apple,spi.yaml66
-rw-r--r--Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml4
-rw-r--r--Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml20
-rw-r--r--Documentation/devicetree/bindings/spi/atmel,quadspi.yaml3
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-aux-spi.txt38
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-aux-spi.yaml53
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt23
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.yaml50
-rw-r--r--Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml34
-rw-r--r--Documentation/devicetree/bindings/spi/cdns,xspi.yaml32
-rw-r--r--Documentation/devicetree/bindings/spi/cirrus,ep9301-spi.yaml70
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,dspi-peripheral-props.yaml30
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,dspi.yaml134
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,espi.yaml65
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,spi-fsl-qspi.yaml21
-rw-r--r--Documentation/devicetree/bindings/spi/fsl,spi.yaml74
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-spi.txt62
-rw-r--r--Documentation/devicetree/bindings/spi/ibm,spi-fsi.yaml55
-rw-r--r--Documentation/devicetree/bindings/spi/marvell,armada-3700-spi.yaml55
-rw-r--r--Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml35
-rw-r--r--Documentation/devicetree/bindings/spi/marvell,orion-spi.yaml102
-rw-r--r--Documentation/devicetree/bindings/spi/mediatek,spi-mt65xx.yaml8
-rw-r--r--Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml106
-rw-r--r--Documentation/devicetree/bindings/spi/mxs-spi.yaml3
-rw-r--r--Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.txt36
-rw-r--r--Documentation/devicetree/bindings/spi/nuvoton,npcm-pspi.yaml72
-rw-r--r--Documentation/devicetree/bindings/spi/nuvoton,wpcm450-fiu.yaml5
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml18
-rw-r--r--Documentation/devicetree/bindings/spi/nxp,lpc3220-spi.yaml44
-rw-r--r--Documentation/devicetree/bindings/spi/nxp,sc18is.yaml51
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-geni-qcom.yaml3
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qpic-snand.yaml89
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qup.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/realtek,rtl9301-snand.yaml62
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,rzv2h-rspi.yaml141
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,sh-msiof.yaml44
-rw-r--r--Documentation/devicetree/bindings/spi/samsung,spi.yaml10
-rw-r--r--Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml21
-rw-r--r--Documentation/devicetree/bindings/spi/spi-armada-3700.txt25
-rw-r--r--Documentation/devicetree/bindings/spi/spi-cadence.yaml19
-rw-r--r--Documentation/devicetree/bindings/spi/spi-controller.yaml54
-rw-r--r--Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt65
-rw-r--r--Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml11
-rw-r--r--Documentation/devicetree/bindings/spi/spi-mux.yaml1
-rw-r--r--Documentation/devicetree/bindings/spi/spi-nxp-fspi.yaml19
-rw-r--r--Documentation/devicetree/bindings/spi/spi-orion.txt79
-rw-r--r--Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml15
-rw-r--r--Documentation/devicetree/bindings/spi/spi-rockchip.yaml4
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sc18is602.txt23
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sg2044-nor.yaml54
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sprd.txt33
-rw-r--r--Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml25
-rw-r--r--Documentation/devicetree/bindings/spi/sprd,sc9860-spi.yaml72
-rw-r--r--Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml4
-rw-r--r--Documentation/devicetree/bindings/spi/st,stm32-spi.yaml52
-rw-r--r--Documentation/devicetree/bindings/spi/st,stm32mp25-ospi.yaml106
-rw-r--r--Documentation/devicetree/bindings/spi/ti,qspi.yaml96
-rw-r--r--Documentation/devicetree/bindings/spi/ti_qspi.txt53
-rw-r--r--Documentation/devicetree/bindings/spmi/apple,spmi.yaml57
-rw-r--r--Documentation/devicetree/bindings/spmi/hisilicon,hisi-spmi-controller.yaml17
-rw-r--r--Documentation/devicetree/bindings/spmi/mtk,spmi-mtk-pmif.yaml1
-rw-r--r--Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml1
-rw-r--r--Documentation/devicetree/bindings/spmi/qcom,x1e80100-spmi-pmic-arb.yaml140
-rw-r--r--Documentation/devicetree/bindings/sram/allwinner,sun4i-a10-system-control.yaml35
-rw-r--r--Documentation/devicetree/bindings/sram/qcom,imem.yaml19
-rw-r--r--Documentation/devicetree/bindings/sram/sram.yaml6
-rw-r--r--Documentation/devicetree/bindings/staging/iio/adc/spear-adc.txt24
-rw-r--r--Documentation/devicetree/bindings/submitting-patches.rst44
-rw-r--r--Documentation/devicetree/bindings/thermal/airoha,en7581-thermal.yaml48
-rw-r--r--Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml88
-rw-r--r--Documentation/devicetree/bindings/thermal/amazon,al-thermal.txt33
-rw-r--r--Documentation/devicetree/bindings/thermal/amazon,al-thermal.yaml50
-rw-r--r--Documentation/devicetree/bindings/thermal/amlogic,thermal.yaml37
-rw-r--r--Documentation/devicetree/bindings/thermal/armada-thermal.txt42
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,avs-ro-thermal.yaml24
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,avs-tmon.yaml18
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.yaml1
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,sr-thermal.txt105
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,sr-thermal.yaml121
-rw-r--r--Documentation/devicetree/bindings/thermal/da9062-thermal.txt36
-rw-r--r--Documentation/devicetree/bindings/thermal/db8500-thermal.txt44
-rw-r--r--Documentation/devicetree/bindings/thermal/dlg,da9062-thermal.yaml35
-rw-r--r--Documentation/devicetree/bindings/thermal/fsl,imx91-tmu.yaml87
-rw-r--r--Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml1
-rw-r--r--Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml5
-rw-r--r--Documentation/devicetree/bindings/thermal/hisilicon,tsensor.yaml57
-rw-r--r--Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt32
-rw-r--r--Documentation/devicetree/bindings/thermal/imx-thermal.yaml41
-rw-r--r--Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml17
-rw-r--r--Documentation/devicetree/bindings/thermal/loongson,ls2k-thermal.yaml25
-rw-r--r--Documentation/devicetree/bindings/thermal/marvell,armada-ap806-thermal.yaml46
-rw-r--r--Documentation/devicetree/bindings/thermal/marvell,armada370-thermal.yaml37
-rw-r--r--Documentation/devicetree/bindings/thermal/mediatek,lvts-thermal.yaml7
-rw-r--r--Documentation/devicetree/bindings/thermal/mediatek,thermal.yaml27
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.yaml5
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra186-bpmp-thermal.yaml12
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra30-tsensor.yaml9
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom,spmi-temp-alarm.yaml1
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-lmh.yaml12
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm-hc.yaml8
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml8
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-tsens.yaml133
-rw-r--r--Documentation/devicetree/bindings/thermal/qoriq-thermal.yaml12
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml73
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-thermal.yaml64
-rw-r--r--Documentation/devicetree/bindings/thermal/renesas,r9a08g045-tsu.yaml93
-rw-r--r--Documentation/devicetree/bindings/thermal/renesas,r9a09g047-tsu.yaml91
-rw-r--r--Documentation/devicetree/bindings/thermal/rockchip-thermal.yaml82
-rw-r--r--Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml43
-rw-r--r--Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml3
-rw-r--r--Documentation/devicetree/bindings/thermal/socionext,uniphier-thermal.yaml5
-rw-r--r--Documentation/devicetree/bindings/thermal/sprd-thermal.yaml49
-rw-r--r--Documentation/devicetree/bindings/thermal/st,stih407-thermal.yaml58
-rw-r--r--Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml5
-rw-r--r--Documentation/devicetree/bindings/thermal/st-thermal.txt32
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal-zones.yaml13
-rw-r--r--Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml15
-rw-r--r--Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml5
-rw-r--r--Documentation/devicetree/bindings/timer/actions,owl-timer.txt21
-rw-r--r--Documentation/devicetree/bindings/timer/actions,owl-timer.yaml107
-rw-r--r--Documentation/devicetree/bindings/timer/altr,timer-1.0.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/altr,timer-1.0.yaml39
-rw-r--r--Documentation/devicetree/bindings/timer/andestech,plmt0.yaml53
-rw-r--r--Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml2
-rw-r--r--Documentation/devicetree/bindings/timer/arm,mps2-timer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/arm,mps2-timer.yaml49
-rw-r--r--Documentation/devicetree/bindings/timer/arm,twd-timer.yaml6
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,bcm2835-system-timer.txt22
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,bcm2835-system-timer.yaml50
-rw-r--r--Documentation/devicetree/bindings/timer/cdns,ttc.yaml22
-rw-r--r--Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/cnxt,cx92755-timer.yaml49
-rw-r--r--Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt42
-rw-r--r--Documentation/devicetree/bindings/timer/csky,gx6605s-timer.yaml40
-rw-r--r--Documentation/devicetree/bindings/timer/csky,mptimer.txt42
-rw-r--r--Documentation/devicetree/bindings/timer/csky,mptimer.yaml46
-rw-r--r--Documentation/devicetree/bindings/timer/digicolor-timer.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/econet,en751221-timer.yaml80
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer0.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer1.txt15
-rw-r--r--Documentation/devicetree/bindings/timer/faraday,fttmr010.txt38
-rw-r--r--Documentation/devicetree/bindings/timer/faraday,fttmr010.yaml89
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,ftm-timer.txt31
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,ftm-timer.yaml62
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,gtm.txt30
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,gtm.yaml83
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,imxgpt.yaml3
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,timrot.yaml48
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,vf610-pit.yaml59
-rw-r--r--Documentation/devicetree/bindings/timer/img,pistachio-gptimer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/img,pistachio-gptimer.yaml69
-rw-r--r--Documentation/devicetree/bindings/timer/jcore,pit.txt24
-rw-r--r--Documentation/devicetree/bindings/timer/jcore,pit.yaml43
-rw-r--r--Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt33
-rw-r--r--Documentation/devicetree/bindings/timer/lsi,zevio-timer.yaml56
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,armada-370-timer.yaml88
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt44
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,orion-timer.txt16
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,orion-timer.yaml43
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt48
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,timer.yaml87
-rw-r--r--Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml2
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml1
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra186-timer.yaml1
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,lpc3220-timer.yaml55
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,s32g2-stm.yaml64
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml9
-rw-r--r--Documentation/devicetree/bindings/timer/ralink,cevt-systick.yaml38
-rw-r--r--Documentation/devicetree/bindings/timer/realtek,otto-timer.yaml63
-rw-r--r--Documentation/devicetree/bindings/timer/realtek,rtd1625-systimer.yaml47
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,cmt.yaml46
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,em-sti.yaml10
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,mtu2.yaml14
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,ostm.yaml24
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml7
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,tmu.yaml48
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,tpu.yaml56
-rw-r--r--Documentation/devicetree/bindings/timer/rockchip,rk-timer.yaml1
-rw-r--r--Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.yaml10
-rw-r--r--Documentation/devicetree/bindings/timer/sifive,clint.yaml28
-rw-r--r--Documentation/devicetree/bindings/timer/snps,arc-timer.txt27
-rw-r--r--Documentation/devicetree/bindings/timer/snps,arc-timer.yaml45
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt14
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-gfrc.yaml30
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-rtc.txt14
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-rtc.yaml30
-rw-r--r--Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/socionext,milbeaut-timer.yaml40
-rw-r--r--Documentation/devicetree/bindings/timer/sprd,sc9860-timer.yaml68
-rw-r--r--Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt20
-rw-r--r--Documentation/devicetree/bindings/timer/st,spear-timer.txt16
-rw-r--r--Documentation/devicetree/bindings/timer/st,spear-timer.yaml36
-rw-r--r--Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml16
-rw-r--r--Documentation/devicetree/bindings/timer/ti,da830-timer.yaml68
-rw-r--r--Documentation/devicetree/bindings/timer/ti,davinci-timer.txt37
-rw-r--r--Documentation/devicetree/bindings/timer/ti,keystone-timer.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/ti,keystone-timer.yaml63
-rw-r--r--Documentation/devicetree/bindings/timer/via,vt8500-timer.txt15
-rw-r--r--Documentation/devicetree/bindings/timer/via,vt8500-timer.yaml51
-rw-r--r--Documentation/devicetree/bindings/tpm/ibm,vtpm.yaml4
-rw-r--r--Documentation/devicetree/bindings/tpm/tcg,tpm-tis-i2c.yaml1
-rw-r--r--Documentation/devicetree/bindings/tpm/tcg,tpm_tis-spi.yaml1
-rw-r--r--Documentation/devicetree/bindings/tpm/tpm-common.yaml2
-rw-r--r--Documentation/devicetree/bindings/trigger-source/adi,util-sigma-delta-spi.yaml49
-rw-r--r--Documentation/devicetree/bindings/trigger-source/gpio-trigger.yaml40
-rw-r--r--Documentation/devicetree/bindings/trigger-source/pwm-trigger.yaml37
-rw-r--r--Documentation/devicetree/bindings/trivial-devices.yaml243
-rw-r--r--Documentation/devicetree/bindings/ufs/amd,versal2-ufs.yaml61
-rw-r--r--Documentation/devicetree/bindings/ufs/mediatek,ufs.yaml49
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,sc7180-ufshc.yaml167
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml180
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,ufs-common.yaml67
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,ufs.yaml172
-rw-r--r--Documentation/devicetree/bindings/ufs/renesas,ufs.yaml28
-rw-r--r--Documentation/devicetree/bindings/ufs/rockchip,rk3576-ufshc.yaml105
-rw-r--r--Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml78
-rw-r--r--Documentation/devicetree/bindings/ufs/ufs-common.yaml16
-rw-r--r--Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml2
-rw-r--r--Documentation/devicetree/bindings/usb/analogix,anx7411.yaml13
-rw-r--r--Documentation/devicetree/bindings/usb/apple,dwc3.yaml80
-rw-r--r--Documentation/devicetree/bindings/usb/aspeed,usb-vhub.yaml44
-rw-r--r--Documentation/devicetree/bindings/usb/brcm,bdc.yaml14
-rw-r--r--Documentation/devicetree/bindings/usb/cdns,usb3.yaml15
-rw-r--r--Documentation/devicetree/bindings/usb/chipidea,usb2-common.yaml203
-rw-r--r--Documentation/devicetree/bindings/usb/chipidea,usb2-imx.yaml309
-rw-r--r--Documentation/devicetree/bindings/usb/ci-hdrc-usb2.yaml365
-rw-r--r--Documentation/devicetree/bindings/usb/cypress,cypd4226.yaml5
-rw-r--r--Documentation/devicetree/bindings/usb/cypress,hx3.yaml46
-rw-r--r--Documentation/devicetree/bindings/usb/dwc2.yaml11
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml29
-rw-r--r--Documentation/devicetree/bindings/usb/eswin,eic7700-usb.yaml94
-rw-r--r--Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml33
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml16
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,ls1028a.yaml55
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,usb2.yaml95
-rw-r--r--Documentation/devicetree/bindings/usb/fsl,usbmisc.yaml27
-rw-r--r--Documentation/devicetree/bindings/usb/fsl-usb.txt81
-rw-r--r--Documentation/devicetree/bindings/usb/generic-ehci.yaml5
-rw-r--r--Documentation/devicetree/bindings/usb/generic-ohci.yaml2
-rw-r--r--Documentation/devicetree/bindings/usb/generic-xhci.yaml17
-rw-r--r--Documentation/devicetree/bindings/usb/genesys,gl850g.yaml75
-rw-r--r--Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml22
-rw-r--r--Documentation/devicetree/bindings/usb/hisilicon,hi3798mv200-dwc3.yaml99
-rw-r--r--Documentation/devicetree/bindings/usb/intel,ixp4xx-udc.yaml39
-rw-r--r--Documentation/devicetree/bindings/usb/intel,keembay-dwc3.yaml32
-rw-r--r--Documentation/devicetree/bindings/usb/isp1301.txt24
-rw-r--r--Documentation/devicetree/bindings/usb/ite,it5205.yaml72
-rw-r--r--Documentation/devicetree/bindings/usb/lpc32xx-udc.txt28
-rw-r--r--Documentation/devicetree/bindings/usb/maxim,max33359.yaml10
-rw-r--r--Documentation/devicetree/bindings/usb/maxim,max3420-udc.yaml28
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml9
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml17
-rw-r--r--Documentation/devicetree/bindings/usb/microchip,mpfs-musb.yaml14
-rw-r--r--Documentation/devicetree/bindings/usb/microchip,usb2514.yaml90
-rw-r--r--Documentation/devicetree/bindings/usb/microchip,usb5744.yaml4
-rw-r--r--Documentation/devicetree/bindings/usb/msm-hsusb.txt110
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt23
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml4
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra234-xusb.yaml31
-rw-r--r--Documentation/devicetree/bindings/usb/nxp,lpc3220-udc.yaml50
-rw-r--r--Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml13
-rw-r--r--Documentation/devicetree/bindings/usb/nxp,ptn5110.yaml6
-rw-r--r--Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml14
-rw-r--r--Documentation/devicetree/bindings/usb/parade,ps5511.yaml108
-rw-r--r--Documentation/devicetree/bindings/usb/parade,ps8830.yaml144
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,dwc3.yaml122
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,pmic-typec.yaml80
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml660
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml13
-rw-r--r--Documentation/devicetree/bindings/usb/realtek,rts5411.yaml48
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,rzg3e-xhci.yaml95
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,rzv2m-usb3drd.yaml36
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,usb3-peri.yaml24
-rw-r--r--Documentation/devicetree/bindings/usb/renesas,usbhs.yaml46
-rw-r--r--Documentation/devicetree/bindings/usb/richtek,rt1711h.yaml3
-rw-r--r--Documentation/devicetree/bindings/usb/rockchip,dwc3.yaml27
-rw-r--r--Documentation/devicetree/bindings/usb/s3c2410-usb.txt22
-rw-r--r--Documentation/devicetree/bindings/usb/samsung,exynos-dwc3.yaml96
-rw-r--r--Documentation/devicetree/bindings/usb/smsc,usb3503.yaml90
-rw-r--r--Documentation/devicetree/bindings/usb/snps,dwc3-common.yaml432
-rw-r--r--Documentation/devicetree/bindings/usb/snps,dwc3.yaml390
-rw-r--r--Documentation/devicetree/bindings/usb/spacemit,k1-dwc3.yaml121
-rw-r--r--Documentation/devicetree/bindings/usb/ti,am62-usb.yaml8
-rw-r--r--Documentation/devicetree/bindings/usb/ti,hd3ss3220.yaml48
-rw-r--r--Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml3
-rw-r--r--Documentation/devicetree/bindings/usb/ti,tusb1046.yaml50
-rw-r--r--Documentation/devicetree/bindings/usb/ti,tusb73x0-pci.yaml55
-rw-r--r--Documentation/devicetree/bindings/usb/ti,twl4030-usb.yaml74
-rw-r--r--Documentation/devicetree/bindings/usb/ti,twl6030-usb.yaml48
-rw-r--r--Documentation/devicetree/bindings/usb/ti,usb8020b.yaml69
-rw-r--r--Documentation/devicetree/bindings/usb/ti,usb8041.yaml20
-rw-r--r--Documentation/devicetree/bindings/usb/twlxxxx-usb.txt43
-rw-r--r--Documentation/devicetree/bindings/usb/usb-device.yaml9
-rw-r--r--Documentation/devicetree/bindings/usb/usb-hub.yaml84
-rw-r--r--Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml11
-rw-r--r--Documentation/devicetree/bindings/usb/usb-switch-ports.yaml68
-rw-r--r--Documentation/devicetree/bindings/usb/usb-switch.yaml28
-rw-r--r--Documentation/devicetree/bindings/usb/usb-uhci.txt18
-rw-r--r--Documentation/devicetree/bindings/usb/usb-uhci.yaml88
-rw-r--r--Documentation/devicetree/bindings/usb/usb.yaml2
-rw-r--r--Documentation/devicetree/bindings/usb/usb251xb.yaml9
-rw-r--r--Documentation/devicetree/bindings/usb/xlnx,usb2.yaml2
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml263
-rw-r--r--Documentation/devicetree/bindings/virtio/pci-iommu.yaml10
-rw-r--r--Documentation/devicetree/bindings/w1/fsl-imx-owire.yaml4
-rw-r--r--Documentation/devicetree/bindings/w1/maxim,ds2482.yaml2
-rw-r--r--Documentation/devicetree/bindings/w1/w1-uart.yaml59
-rw-r--r--Documentation/devicetree/bindings/watchdog/airoha,en7581-wdt.yaml51
-rw-r--r--Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml2
-rw-r--r--Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml1
-rw-r--r--Documentation/devicetree/bindings/watchdog/apple,wdt.yaml22
-rw-r--r--Documentation/devicetree/bindings/watchdog/arm,sp805.yaml5
-rw-r--r--Documentation/devicetree/bindings/watchdog/armada-37xx-wdt.txt23
-rw-r--r--Documentation/devicetree/bindings/watchdog/aspeed,ast2400-wdt.yaml146
-rw-r--r--Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt73
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel,sama5d4-wdt.yaml12
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt18
-rw-r--r--Documentation/devicetree/bindings/watchdog/cirrus,ep9301-wdt.yaml42
-rw-r--r--Documentation/devicetree/bindings/watchdog/davinci-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/dlg,da9062-watchdog.yaml2
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml1
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.yaml22
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml8
-rw-r--r--Documentation/devicetree/bindings/watchdog/img,pdc-wdt.yaml55
-rw-r--r--Documentation/devicetree/bindings/watchdog/imgpdc-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/kontron,sl28cpld-wdt.yaml9
-rw-r--r--Documentation/devicetree/bindings/watchdog/lantiq,wdt.yaml57
-rw-r--r--Documentation/devicetree/bindings/watchdog/lantiq-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/loongson,ls1x-wdt.yaml3
-rw-r--r--Documentation/devicetree/bindings/watchdog/lpc18xx-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/marvel.txt45
-rw-r--r--Documentation/devicetree/bindings/watchdog/marvell,armada-3700-wdt.yaml41
-rw-r--r--Documentation/devicetree/bindings/watchdog/marvell,orion-wdt.yaml100
-rw-r--r--Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml3
-rw-r--r--Documentation/devicetree/bindings/watchdog/moxa,moxart-watchdog.txt15
-rw-r--r--Documentation/devicetree/bindings/watchdog/nuvoton,npcm-wdt.txt30
-rw-r--r--Documentation/devicetree/bindings/watchdog/nuvoton,npcm750-wdt.yaml60
-rw-r--r--Documentation/devicetree/bindings/watchdog/nxp,lpc1850-wwdt.yaml52
-rw-r--r--Documentation/devicetree/bindings/watchdog/nxp,pnx4008-wdt.yaml3
-rw-r--r--Documentation/devicetree/bindings/watchdog/nxp,s32g2-swt.yaml54
-rw-r--r--Documentation/devicetree/bindings/watchdog/omap-wdt.txt15
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom,pm8916-wdt.yaml2
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml7
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas,r9a09g057-wdt.yaml99
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas,rcar-gen3-wwdt.yaml114
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas,rza-wdt.yaml51
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas,rzg2l-wdt.yaml111
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas,rzn1-wdt.yaml50
-rw-r--r--Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml93
-rw-r--r--Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml14
-rw-r--r--Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml4
-rw-r--r--Documentation/devicetree/bindings/watchdog/sprd,sp9860-wdt.yaml64
-rw-r--r--Documentation/devicetree/bindings/watchdog/sprd-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml6
-rw-r--r--Documentation/devicetree/bindings/watchdog/starfive,jh7100-wdt.yaml40
-rw-r--r--Documentation/devicetree/bindings/watchdog/ti,davinci-wdt.yaml55
-rw-r--r--Documentation/devicetree/bindings/watchdog/ti,omap2-wdt.yaml51
-rw-r--r--Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt10
-rw-r--r--Documentation/devicetree/bindings/watchdog/watchdog.yaml3
-rw-r--r--Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt39
-rw-r--r--Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.yaml47
-rw-r--r--Documentation/devicetree/bindings/watchdog/zii,rave-wdt.yaml49
-rw-r--r--Documentation/devicetree/bindings/watchdog/ziirave-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/writing-bindings.rst51
-rw-r--r--Documentation/devicetree/bindings/writing-schema.rst76
-rw-r--r--Documentation/devicetree/bindings/xilinx.txt26
-rw-r--r--Documentation/devicetree/of_unittest.rst14
-rw-r--r--Documentation/devicetree/overlay-notes.rst18
-rw-r--r--Documentation/devicetree/usage-model.rst6
-rw-r--r--Documentation/doc-guide/checktransupdate.rst54
-rw-r--r--Documentation/doc-guide/contributing.rst2
-rw-r--r--Documentation/doc-guide/index.rst1
-rw-r--r--Documentation/doc-guide/kernel-doc.rst83
-rw-r--r--Documentation/doc-guide/maintainer-profile.rst7
-rw-r--r--Documentation/doc-guide/parse-headers.rst189
-rw-r--r--Documentation/doc-guide/sphinx.rst59
-rw-r--r--Documentation/dontdiff270
-rw-r--r--Documentation/driver-api/auxiliary_bus.rst1
-rw-r--r--Documentation/driver-api/basics.rst3
-rw-r--r--Documentation/driver-api/coco/index.rst12
-rw-r--r--Documentation/driver-api/coco/measurement-registers.rst12
-rw-r--r--Documentation/driver-api/crypto/iaa/iaa-crypto.rst111
-rw-r--r--Documentation/driver-api/cxl/allocation/dax.rst60
-rw-r--r--Documentation/driver-api/cxl/allocation/hugepages.rst32
-rw-r--r--Documentation/driver-api/cxl/allocation/page-allocator.rst54
-rw-r--r--Documentation/driver-api/cxl/allocation/reclaim.rst51
-rw-r--r--Documentation/driver-api/cxl/conventions.rst182
-rw-r--r--Documentation/driver-api/cxl/devices/device-types.rst165
-rw-r--r--Documentation/driver-api/cxl/index.rst46
-rw-r--r--Documentation/driver-api/cxl/linux/access-coordinates.rst178
-rw-r--r--Documentation/driver-api/cxl/linux/cxl-driver.rst630
-rw-r--r--Documentation/driver-api/cxl/linux/dax-driver.rst43
-rw-r--r--Documentation/driver-api/cxl/linux/early-boot.rst137
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/hb-interleave.rst314
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/intra-hb-interleave.rst291
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/multi-interleave.rst401
-rw-r--r--Documentation/driver-api/cxl/linux/example-configurations/single-device.rst246
-rw-r--r--Documentation/driver-api/cxl/linux/memory-hotplug.rst78
-rw-r--r--Documentation/driver-api/cxl/linux/overview.rst103
-rw-r--r--Documentation/driver-api/cxl/maturity-map.rst202
-rw-r--r--Documentation/driver-api/cxl/memory-devices.rst383
-rw-r--r--Documentation/driver-api/cxl/platform/acpi.rst76
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/cedt.rst62
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/dsdt.rst28
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/hmat.rst32
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/slit.rst21
-rw-r--r--Documentation/driver-api/cxl/platform/acpi/srat.rst71
-rw-r--r--Documentation/driver-api/cxl/platform/bios-and-efi.rst262
-rw-r--r--Documentation/driver-api/cxl/platform/cdat.rst118
-rw-r--r--Documentation/driver-api/cxl/platform/example-configs.rst13
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/flexible.rst296
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/hb-interleave.rst107
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/multi-dev-per-hb.rst90
-rw-r--r--Documentation/driver-api/cxl/platform/example-configurations/one-dev-per-hb.rst136
-rw-r--r--Documentation/driver-api/cxl/theory-of-operation.rst415
-rw-r--r--Documentation/driver-api/device-io.rst7
-rw-r--r--Documentation/driver-api/dma-buf.rst2
-rw-r--r--Documentation/driver-api/dmaengine/client.rst9
-rw-r--r--Documentation/driver-api/dmaengine/provider.rst22
-rw-r--r--Documentation/driver-api/dpll.rst120
-rw-r--r--Documentation/driver-api/driver-model/devres.rst19
-rw-r--r--Documentation/driver-api/driver-model/overview.rst2
-rw-r--r--Documentation/driver-api/driver-model/platform.rst9
-rw-r--r--Documentation/driver-api/early-userspace/buffer-format.rst39
-rw-r--r--Documentation/driver-api/eisa.rst10
-rw-r--r--Documentation/driver-api/extcon.rst255
-rw-r--r--Documentation/driver-api/firewire.rst2
-rw-r--r--Documentation/driver-api/firmware/efi/index.rst11
-rw-r--r--Documentation/driver-api/firmware/firmware-usage-guidelines.rst5
-rw-r--r--Documentation/driver-api/fpga/fpga-bridge.rst7
-rw-r--r--Documentation/driver-api/fpga/fpga-mgr.rst34
-rw-r--r--Documentation/driver-api/fpga/fpga-region.rst13
-rw-r--r--Documentation/driver-api/generic-counter.rst4
-rw-r--r--Documentation/driver-api/generic_pt.rst137
-rw-r--r--Documentation/driver-api/gpio/board.rst71
-rw-r--r--Documentation/driver-api/gpio/consumer.rst14
-rw-r--r--Documentation/driver-api/gpio/driver.rst35
-rw-r--r--Documentation/driver-api/gpio/drivers-on-gpio.rst7
-rw-r--r--Documentation/driver-api/gpio/index.rst5
-rw-r--r--Documentation/driver-api/gpio/intro.rst12
-rw-r--r--Documentation/driver-api/gpio/legacy-boards.rst298
-rw-r--r--Documentation/driver-api/gpio/legacy.rst695
-rw-r--r--Documentation/driver-api/gpio/pca953x.rst552
-rw-r--r--Documentation/driver-api/hw-recoverable-errors.rst60
-rw-r--r--Documentation/driver-api/i3c/protocol.rst4
-rw-r--r--Documentation/driver-api/iio/buffers.rst8
-rw-r--r--Documentation/driver-api/iio/core.rst16
-rw-r--r--Documentation/driver-api/index.rst174
-rw-r--r--Documentation/driver-api/infiniband.rst16
-rw-r--r--Documentation/driver-api/infrastructure.rst6
-rw-r--r--Documentation/driver-api/input.rst7
-rw-r--r--Documentation/driver-api/ipmi.rst35
-rw-r--r--Documentation/driver-api/libata.rst25
-rw-r--r--Documentation/driver-api/media/camera-sensor.rst32
-rw-r--r--Documentation/driver-api/media/drivers/ccs/ccs.rst53
-rw-r--r--Documentation/driver-api/media/drivers/index.rst1
-rw-r--r--Documentation/driver-api/media/drivers/ipu6.rst190
-rw-r--r--Documentation/driver-api/media/drivers/zoran.rst2
-rw-r--r--Documentation/driver-api/media/maintainer-entry-profile.rst6
-rw-r--r--Documentation/driver-api/media/mc-core.rst67
-rw-r--r--Documentation/driver-api/media/tx-rx.rst48
-rw-r--r--Documentation/driver-api/media/v4l2-controls.rst9
-rw-r--r--Documentation/driver-api/media/v4l2-core.rst2
-rw-r--r--Documentation/driver-api/media/v4l2-fh.rst59
-rw-r--r--Documentation/driver-api/media/v4l2-isp.rst49
-rw-r--r--Documentation/driver-api/media/v4l2-jpeg.rst10
-rw-r--r--Documentation/driver-api/media/v4l2-subdev.rst2
-rw-r--r--Documentation/driver-api/mmc/index.rst1
-rw-r--r--Documentation/driver-api/mmc/mmc-test.rst299
-rw-r--r--Documentation/driver-api/mtd/nand_ecc.rst2
-rw-r--r--Documentation/driver-api/ntb.rst2
-rw-r--r--Documentation/driver-api/nvdimm/btt.rst2
-rw-r--r--Documentation/driver-api/nvdimm/nvdimm.rst8
-rw-r--r--Documentation/driver-api/nvmem.rst14
-rw-r--r--Documentation/driver-api/parport-lowlevel.rst5
-rw-r--r--Documentation/driver-api/pci/index.rst1
-rw-r--r--Documentation/driver-api/pci/p2pdma.rst97
-rw-r--r--Documentation/driver-api/pci/pci.rst12
-rw-r--r--Documentation/driver-api/pci/tsm.rst21
-rw-r--r--Documentation/driver-api/phy/phy.rst3
-rw-r--r--Documentation/driver-api/pin-control.rst73
-rw-r--r--Documentation/driver-api/pldmfw/index.rst1
-rw-r--r--Documentation/driver-api/pm/devices.rst6
-rw-r--r--Documentation/driver-api/pps.rst63
-rw-r--r--Documentation/driver-api/pwm.rst24
-rw-r--r--Documentation/driver-api/pwrseq.rst95
-rw-r--r--Documentation/driver-api/reset.rst1
-rw-r--r--Documentation/driver-api/scsi.rst26
-rw-r--r--Documentation/driver-api/serial/driver.rst11
-rw-r--r--Documentation/driver-api/soundwire/bra.rst336
-rw-r--r--Documentation/driver-api/soundwire/bra_cadence.rst66
-rw-r--r--Documentation/driver-api/soundwire/index.rst2
-rw-r--r--Documentation/driver-api/soundwire/stream.rst6
-rw-r--r--Documentation/driver-api/soundwire/summary.rst8
-rw-r--r--Documentation/driver-api/spi.rst2
-rw-r--r--Documentation/driver-api/thermal/exynos_thermal_emulation.rst14
-rw-r--r--Documentation/driver-api/thermal/intel_dptf.rst53
-rw-r--r--Documentation/driver-api/thermal/sysfs-api.rst158
-rw-r--r--Documentation/driver-api/tty/console.rst45
-rw-r--r--Documentation/driver-api/tty/index.rst1
-rw-r--r--Documentation/driver-api/tty/tty_driver.rst4
-rw-r--r--Documentation/driver-api/tty/tty_port.rst5
-rw-r--r--Documentation/driver-api/tty/tty_struct.rst2
-rw-r--r--Documentation/driver-api/usb/anchors.rst11
-rw-r--r--Documentation/driver-api/usb/callbacks.rst6
-rw-r--r--Documentation/driver-api/usb/hotplug.rst2
-rw-r--r--Documentation/driver-api/usb/index.rst1
-rw-r--r--Documentation/driver-api/usb/usb.rst7
-rw-r--r--Documentation/driver-api/usb/writing_musb_glue_layer.rst6
-rw-r--r--Documentation/driver-api/vfio.rst2
-rw-r--r--Documentation/driver-api/virtio/writing_virtio_drivers.rst1
-rw-r--r--Documentation/driver-api/wbrf.rst2
-rw-r--r--Documentation/driver-api/wmi.rst13
-rw-r--r--Documentation/edac/features.rst103
-rw-r--r--Documentation/edac/index.rst12
-rw-r--r--Documentation/edac/memory_repair.rst152
-rw-r--r--Documentation/edac/scrub.rst342
-rw-r--r--Documentation/fault-injection/fault-injection.rst64
-rw-r--r--Documentation/fault-injection/index.rst2
-rw-r--r--Documentation/fb/aty128fb.rst8
-rw-r--r--Documentation/fb/efifb.rst6
-rw-r--r--Documentation/fb/ep93xx-fb.rst4
-rw-r--r--Documentation/fb/fbcon.rst42
-rw-r--r--Documentation/fb/gxfb.rst8
-rw-r--r--Documentation/fb/index.rst80
-rw-r--r--Documentation/fb/lxfb.rst9
-rw-r--r--Documentation/fb/matroxfb.rst9
-rw-r--r--Documentation/fb/pvr2fb.rst6
-rw-r--r--Documentation/fb/sa1100fb.rst9
-rw-r--r--Documentation/fb/sisfb.rst6
-rw-r--r--Documentation/fb/sm712fb.rst6
-rw-r--r--Documentation/fb/sstfb.rst2
-rw-r--r--Documentation/fb/tgafb.rst6
-rw-r--r--Documentation/fb/udlfb.rst6
-rw-r--r--Documentation/fb/vesafb.rst6
-rw-r--r--Documentation/features/core/eBPF-JIT/arch-support.txt4
-rw-r--r--Documentation/features/core/generic-idle-thread/arch-support.txt2
-rw-r--r--Documentation/features/core/jump-labels/arch-support.txt2
-rw-r--r--Documentation/features/core/mseal_sys_mappings/arch-support.txt30
-rw-r--r--Documentation/features/core/thread-info-in-task/arch-support.txt2
-rw-r--r--Documentation/features/core/tracehook/arch-support.txt2
-rw-r--r--Documentation/features/debug/debug-vm-pgtable/arch-support.txt2
-rw-r--r--Documentation/features/debug/kprobes-on-ftrace/arch-support.txt2
-rwxr-xr-xDocumentation/features/list-arch.sh11
-rw-r--r--Documentation/features/locking/queued-spinlocks/arch-support.txt2
-rw-r--r--Documentation/features/perf/kprobes-event/arch-support.txt2
-rw-r--r--Documentation/features/sched/membarrier-sync-core/arch-support.txt18
-rwxr-xr-xDocumentation/features/scripts/features-refresh.sh98
-rw-r--r--Documentation/features/time/clockevents/arch-support.txt2
-rw-r--r--Documentation/features/vm/PG_uncached/arch-support.txt30
-rw-r--r--Documentation/filesystems/9p.rst66
-rw-r--r--Documentation/filesystems/api-summary.rst3
-rw-r--r--Documentation/filesystems/autofs.rst6
-rw-r--r--Documentation/filesystems/buffer.rst12
-rw-r--r--Documentation/filesystems/caching/cachefiles.rst2
-rw-r--r--Documentation/filesystems/caching/fscache.rst8
-rw-r--r--Documentation/filesystems/ceph.rst15
-rw-r--r--Documentation/filesystems/coda.rst2
-rw-r--r--Documentation/filesystems/dax.rst2
-rw-r--r--Documentation/filesystems/debugfs.rst31
-rw-r--r--Documentation/filesystems/directory-locking.rst4
-rw-r--r--Documentation/filesystems/dlmfs.rst2
-rw-r--r--Documentation/filesystems/efivarfs.rst2
-rw-r--r--Documentation/filesystems/erofs.rst5
-rw-r--r--Documentation/filesystems/ext4/atomic_writes.rst225
-rw-r--r--Documentation/filesystems/ext4/bitmaps.rst7
-rw-r--r--Documentation/filesystems/ext4/blockgroup.rst11
-rw-r--r--Documentation/filesystems/ext4/directory.rst63
-rw-r--r--Documentation/filesystems/ext4/dynamic.rst10
-rw-r--r--Documentation/filesystems/ext4/globals.rst15
-rw-r--r--Documentation/filesystems/ext4/index.rst2
-rw-r--r--Documentation/filesystems/ext4/inode_table.rst9
-rw-r--r--Documentation/filesystems/ext4/inodes.rst2
-rw-r--r--Documentation/filesystems/ext4/overview.rst21
-rw-r--r--Documentation/filesystems/ext4/super.rst24
-rw-r--r--Documentation/filesystems/f2fs.rst249
-rw-r--r--Documentation/filesystems/fiemap.rst49
-rw-r--r--Documentation/filesystems/files.rst2
-rw-r--r--Documentation/filesystems/fscrypt.rst271
-rw-r--r--Documentation/filesystems/fsverity.rst48
-rw-r--r--Documentation/filesystems/fuse/fuse-io-uring.rst99
-rw-r--r--Documentation/filesystems/fuse/fuse-io.rst (renamed from Documentation/filesystems/fuse-io.rst)2
-rw-r--r--Documentation/filesystems/fuse/fuse-passthrough.rst133
-rw-r--r--Documentation/filesystems/fuse/fuse.rst (renamed from Documentation/filesystems/fuse.rst)20
-rw-r--r--Documentation/filesystems/fuse/index.rst14
-rw-r--r--Documentation/filesystems/gfs2-glocks.rst252
-rw-r--r--Documentation/filesystems/gfs2.rst52
-rw-r--r--Documentation/filesystems/gfs2/glocks.rst249
-rw-r--r--Documentation/filesystems/gfs2/index.rst64
-rw-r--r--Documentation/filesystems/gfs2/uevents.rst (renamed from Documentation/filesystems/gfs2-uevents.rst)0
-rw-r--r--Documentation/filesystems/hpfs.rst2
-rw-r--r--Documentation/filesystems/idmappings.rst12
-rw-r--r--Documentation/filesystems/index.rst13
-rw-r--r--Documentation/filesystems/iomap/design.rst459
-rw-r--r--Documentation/filesystems/iomap/index.rst13
-rw-r--r--Documentation/filesystems/iomap/operations.rst785
-rw-r--r--Documentation/filesystems/iomap/porting.rst120
-rw-r--r--Documentation/filesystems/journalling.rst10
-rw-r--r--Documentation/filesystems/locking.rst81
-rw-r--r--Documentation/filesystems/mount_api.rst38
-rw-r--r--Documentation/filesystems/multigrain-ts.rst125
-rw-r--r--Documentation/filesystems/netfs_library.rst1016
-rw-r--r--Documentation/filesystems/nfs/exporting.rst7
-rw-r--r--Documentation/filesystems/nfs/index.rst2
-rw-r--r--Documentation/filesystems/nfs/localio.rst357
-rw-r--r--Documentation/filesystems/nfs/nfsd-io-modes.rst153
-rw-r--r--Documentation/filesystems/nfs/nfsd-maintainer-entry-profile.rst547
-rw-r--r--Documentation/filesystems/nfs/reexport.rst10
-rw-r--r--Documentation/filesystems/ntfs.rst466
-rw-r--r--Documentation/filesystems/ocfs2-online-filecheck.rst20
-rw-r--r--Documentation/filesystems/overlayfs.rst85
-rw-r--r--Documentation/filesystems/path-lookup.rst2
-rw-r--r--Documentation/filesystems/path-lookup.txt2
-rw-r--r--Documentation/filesystems/porting.rst214
-rw-r--r--Documentation/filesystems/proc.rst269
-rw-r--r--Documentation/filesystems/propagate_umount.txt484
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.rst14
-rw-r--r--Documentation/filesystems/relay.rst36
-rw-r--r--Documentation/filesystems/resctrl.rst1932
-rw-r--r--Documentation/filesystems/sharedsubtree.rst1347
-rw-r--r--Documentation/filesystems/smb/index.rst1
-rw-r--r--Documentation/filesystems/smb/ksmbd.rst26
-rw-r--r--Documentation/filesystems/smb/smbdirect.rst103
-rw-r--r--Documentation/filesystems/squashfs.rst14
-rw-r--r--Documentation/filesystems/sysfs.rst27
-rw-r--r--Documentation/filesystems/sysv-fs.rst264
-rw-r--r--Documentation/filesystems/tmpfs.rst24
-rw-r--r--Documentation/filesystems/ubifs-authentication.rst2
-rw-r--r--Documentation/filesystems/vfs.rst193
-rw-r--r--Documentation/filesystems/xfs/xfs-delayed-logging-design.rst2
-rw-r--r--Documentation/filesystems/xfs/xfs-maintainer-entry-profile.rst2
-rw-r--r--Documentation/filesystems/xfs/xfs-online-fsck-design.rst888
-rw-r--r--Documentation/firmware-guide/acpi/apei/einj.rst67
-rw-r--r--Documentation/firmware-guide/acpi/dsd/data-node-references.rst26
-rw-r--r--Documentation/firmware-guide/acpi/dsd/graph.rst11
-rw-r--r--Documentation/firmware-guide/acpi/dsd/leds.rst7
-rw-r--r--Documentation/firmware-guide/acpi/enumeration.rst12
-rw-r--r--Documentation/firmware-guide/acpi/gpio-properties.rst34
-rw-r--r--Documentation/firmware-guide/acpi/i2c-muxes.rst12
-rw-r--r--Documentation/firmware-guide/acpi/index.rst1
-rw-r--r--Documentation/firmware-guide/acpi/method-customizing.rst89
-rw-r--r--Documentation/firmware-guide/acpi/namespace.rst4
-rw-r--r--Documentation/gpu/amdgpu/amd-hardware-list-info.rst23
-rw-r--r--Documentation/gpu/amdgpu/amdgpu-glossary.rst120
-rw-r--r--Documentation/gpu/amdgpu/apu-asic-info-table.csv31
-rw-r--r--Documentation/gpu/amdgpu/debugfs.rst210
-rw-r--r--Documentation/gpu/amdgpu/debugging.rst105
-rw-r--r--Documentation/gpu/amdgpu/dgpu-asic-info-table.csv56
-rw-r--r--Documentation/gpu/amdgpu/display/dc-arch-overview.svg731
-rw-r--r--Documentation/gpu/amdgpu/display/dc-components.svg732
-rw-r--r--Documentation/gpu/amdgpu/display/dc-debug.rst187
-rw-r--r--Documentation/gpu/amdgpu/display/dc-glossary.rst8
-rw-r--r--Documentation/gpu/amdgpu/display/dcn-blocks.rst57
-rw-r--r--Documentation/gpu/amdgpu/display/dcn-overview.rst2
-rw-r--r--Documentation/gpu/amdgpu/display/display-contributing.rst168
-rw-r--r--Documentation/gpu/amdgpu/display/display-manager.rst7
-rw-r--r--Documentation/gpu/amdgpu/display/index.rst79
-rw-r--r--Documentation/gpu/amdgpu/display/programming-model-dcn.rst162
-rw-r--r--Documentation/gpu/amdgpu/driver-core.rst85
-rw-r--r--Documentation/gpu/amdgpu/driver-misc.rst17
-rw-r--r--Documentation/gpu/amdgpu/gc/index.rst52
-rw-r--r--Documentation/gpu/amdgpu/gc/mes.rst38
-rw-r--r--Documentation/gpu/amdgpu/index.rst8
-rw-r--r--Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg1279
-rw-r--r--Documentation/gpu/amdgpu/process-isolation.rst59
-rw-r--r--Documentation/gpu/amdgpu/thermal.rst18
-rw-r--r--Documentation/gpu/amdgpu/userq.rst203
-rw-r--r--Documentation/gpu/automated_testing.rst18
-rw-r--r--Documentation/gpu/driver-uapi.rst10
-rw-r--r--Documentation/gpu/drivers.rst4
-rw-r--r--Documentation/gpu/drm-client.rst3
-rw-r--r--Documentation/gpu/drm-compute.rst54
-rw-r--r--Documentation/gpu/drm-internals.rst41
-rw-r--r--Documentation/gpu/drm-kms-helpers.rst37
-rw-r--r--Documentation/gpu/drm-kms.rst37
-rw-r--r--Documentation/gpu/drm-uapi.rst212
-rw-r--r--Documentation/gpu/drm-usage-stats.rst75
-rw-r--r--Documentation/gpu/i915.rst29
-rw-r--r--Documentation/gpu/index.rst1
-rw-r--r--Documentation/gpu/introduction.rst12
-rw-r--r--Documentation/gpu/kms-properties.csv2
-rw-r--r--Documentation/gpu/komeda-kms.rst2
-rw-r--r--Documentation/gpu/msm-preemption.rst99
-rw-r--r--Documentation/gpu/nouveau.rst32
-rw-r--r--Documentation/gpu/nova/core/devinit.rst61
-rw-r--r--Documentation/gpu/nova/core/falcon.rst158
-rw-r--r--Documentation/gpu/nova/core/fwsec.rst181
-rw-r--r--Documentation/gpu/nova/core/guidelines.rst24
-rw-r--r--Documentation/gpu/nova/core/todo.rst399
-rw-r--r--Documentation/gpu/nova/core/vbios.rst181
-rw-r--r--Documentation/gpu/nova/guidelines.rst69
-rw-r--r--Documentation/gpu/nova/index.rst34
-rw-r--r--Documentation/gpu/panfrost.rst9
-rw-r--r--Documentation/gpu/panthor.rst56
-rw-r--r--Documentation/gpu/rfc/color_pipeline.rst378
-rw-r--r--Documentation/gpu/rfc/gpusvm.rst118
-rw-r--r--Documentation/gpu/rfc/i915_scheduler.rst2
-rw-r--r--Documentation/gpu/rfc/i915_vm_bind.h11
-rw-r--r--Documentation/gpu/rfc/index.rst7
-rw-r--r--Documentation/gpu/rfc/xe.rst234
-rw-r--r--Documentation/gpu/todo.rst228
-rw-r--r--Documentation/gpu/vgaarbiter.rst6
-rw-r--r--Documentation/gpu/vkms.rst134
-rw-r--r--Documentation/gpu/xe/index.rst6
-rw-r--r--Documentation/gpu/xe/xe-drm-usage-stats.rst10
-rw-r--r--Documentation/gpu/xe/xe_configfs.rst16
-rw-r--r--Documentation/gpu/xe/xe_devcoredump.rst14
-rw-r--r--Documentation/gpu/xe/xe_device.rst10
-rw-r--r--Documentation/gpu/xe/xe_exec_queue.rst20
-rw-r--r--Documentation/gpu/xe/xe_firmware.rst6
-rw-r--r--Documentation/gpu/xe/xe_gt_freq.rst17
-rw-r--r--Documentation/gpu/xe/xe_mm.rst15
-rw-r--r--Documentation/gpu/xe/xe_pcode.rst9
-rw-r--r--Documentation/gpu/zynqmp.rst147
-rw-r--r--Documentation/hid/hid-alps.rst8
-rw-r--r--Documentation/hid/hid-bpf.rst173
-rw-r--r--Documentation/hid/index.rst1
-rw-r--r--Documentation/hid/intel-ish-hid.rst166
-rw-r--r--Documentation/hid/intel-thc-hid.rst596
-rw-r--r--Documentation/hwmon/abituguru-datasheet.rst8
-rw-r--r--Documentation/hwmon/abituguru.rst2
-rw-r--r--Documentation/hwmon/acpi_power_meter.rst29
-rw-r--r--Documentation/hwmon/adm1021.rst153
-rw-r--r--Documentation/hwmon/adm1275.rst44
-rw-r--r--Documentation/hwmon/adp1050.rst123
-rw-r--r--Documentation/hwmon/aht10.rst10
-rw-r--r--Documentation/hwmon/amc6821.rst7
-rw-r--r--Documentation/hwmon/aquacomputer_d5next.rst9
-rw-r--r--Documentation/hwmon/aspeed-g6-pwm-tach.rst26
-rw-r--r--Documentation/hwmon/asus_ec_sensors.rst23
-rw-r--r--Documentation/hwmon/asus_rog_ryujin.rst47
-rw-r--r--Documentation/hwmon/cgbc-hwmon.rst63
-rw-r--r--Documentation/hwmon/chipcap2.rst73
-rw-r--r--Documentation/hwmon/corsair-cpro.rst8
-rw-r--r--Documentation/hwmon/corsair-psu.rst6
-rw-r--r--Documentation/hwmon/cros_ec_hwmon.rst31
-rw-r--r--Documentation/hwmon/crps.rst97
-rw-r--r--Documentation/hwmon/dell-smm-hwmon.rst66
-rw-r--r--Documentation/hwmon/ds1621.rst10
-rw-r--r--Documentation/hwmon/emc1403.rst17
-rw-r--r--Documentation/hwmon/emc2305.rst1
-rw-r--r--Documentation/hwmon/f71882fg.rst9
-rw-r--r--Documentation/hwmon/g762.rst2
-rw-r--r--Documentation/hwmon/gpd-fan.rst78
-rw-r--r--Documentation/hwmon/htu31.rst37
-rw-r--r--Documentation/hwmon/hwmon-kernel-api.rst23
-rw-r--r--Documentation/hwmon/ina233.rst75
-rw-r--r--Documentation/hwmon/ina238.rst61
-rw-r--r--Documentation/hwmon/ina2xx.rst50
-rw-r--r--Documentation/hwmon/index.rst42
-rw-r--r--Documentation/hwmon/isl28022.rst64
-rw-r--r--Documentation/hwmon/isl68137.rst30
-rw-r--r--Documentation/hwmon/jc42.rst2
-rw-r--r--Documentation/hwmon/kbatt.rst60
-rw-r--r--Documentation/hwmon/kfan.rst39
-rw-r--r--Documentation/hwmon/lm70.rst2
-rw-r--r--Documentation/hwmon/lm75.rst23
-rw-r--r--Documentation/hwmon/lm90.rst166
-rw-r--r--Documentation/hwmon/lm92.rst26
-rw-r--r--Documentation/hwmon/lt3074.rst72
-rw-r--r--Documentation/hwmon/ltc2978.rst48
-rw-r--r--Documentation/hwmon/ltc4282.rst133
-rw-r--r--Documentation/hwmon/macsmc-hwmon.rst71
-rw-r--r--Documentation/hwmon/max127.rst2
-rw-r--r--Documentation/hwmon/max15301.rst10
-rw-r--r--Documentation/hwmon/max16064.rst2
-rw-r--r--Documentation/hwmon/max16065.rst8
-rw-r--r--Documentation/hwmon/max1619.rst8
-rw-r--r--Documentation/hwmon/max16601.rst2
-rw-r--r--Documentation/hwmon/max1668.rst2
-rw-r--r--Documentation/hwmon/max17616.rst62
-rw-r--r--Documentation/hwmon/max197.rst4
-rw-r--r--Documentation/hwmon/max20730.rst8
-rw-r--r--Documentation/hwmon/max31722.rst4
-rw-r--r--Documentation/hwmon/max31730.rst2
-rw-r--r--Documentation/hwmon/max31785.rst2
-rw-r--r--Documentation/hwmon/max31790.rst2
-rw-r--r--Documentation/hwmon/max31827.rst19
-rw-r--r--Documentation/hwmon/max34440.rst53
-rw-r--r--Documentation/hwmon/max6620.rst2
-rw-r--r--Documentation/hwmon/max6639.rst2
-rw-r--r--Documentation/hwmon/max6642.rst27
-rw-r--r--Documentation/hwmon/max6650.rst4
-rw-r--r--Documentation/hwmon/max6697.rst20
-rw-r--r--Documentation/hwmon/max77705.rst41
-rw-r--r--Documentation/hwmon/max8688.rst2
-rw-r--r--Documentation/hwmon/mc33xs2410_hwmon.rst34
-rw-r--r--Documentation/hwmon/mp2869.rst175
-rw-r--r--Documentation/hwmon/mp2891.rst179
-rw-r--r--Documentation/hwmon/mp2925.rst151
-rw-r--r--Documentation/hwmon/mp29502.rst93
-rw-r--r--Documentation/hwmon/mp2993.rst150
-rw-r--r--Documentation/hwmon/mp5920.rst91
-rw-r--r--Documentation/hwmon/mp5990.rst30
-rw-r--r--Documentation/hwmon/mp9941.rst92
-rw-r--r--Documentation/hwmon/mp9945.rst117
-rw-r--r--Documentation/hwmon/mpq8785.rst109
-rw-r--r--Documentation/hwmon/nct6683.rst26
-rw-r--r--Documentation/hwmon/nct7363.rst35
-rw-r--r--Documentation/hwmon/nzxt-kraken3.rst77
-rw-r--r--Documentation/hwmon/oxp-sensors.rst70
-rw-r--r--Documentation/hwmon/pmbus-core.rst50
-rw-r--r--Documentation/hwmon/pmbus.rst4
-rw-r--r--Documentation/hwmon/pt5161l.rst42
-rw-r--r--Documentation/hwmon/qnap-mcu-hwmon.rst27
-rw-r--r--Documentation/hwmon/sa67.rst41
-rw-r--r--Documentation/hwmon/sch5627.rst2
-rw-r--r--Documentation/hwmon/sg2042-mcu.rst78
-rw-r--r--Documentation/hwmon/sht21.rst26
-rw-r--r--Documentation/hwmon/sht3x.rst11
-rw-r--r--Documentation/hwmon/sht4x.rst14
-rw-r--r--Documentation/hwmon/spd5118.rst63
-rw-r--r--Documentation/hwmon/surface_fan.rst25
-rw-r--r--Documentation/hwmon/sy7636a-hwmon.rst4
-rw-r--r--Documentation/hwmon/tmp108.rst8
-rw-r--r--Documentation/hwmon/tps25990.rst147
-rw-r--r--Documentation/hwmon/tps53679.rst8
-rw-r--r--Documentation/hwmon/tsc1641.rst87
-rw-r--r--Documentation/hwmon/xdp710.rst83
-rw-r--r--Documentation/hwmon/zl6100.rst16
-rw-r--r--Documentation/i2c/busses/i2c-i801.rst4
-rw-r--r--Documentation/i2c/busses/i2c-parport.rst2
-rw-r--r--Documentation/i2c/busses/i2c-piix4.rst63
-rw-r--r--Documentation/i2c/i2c_bus.svg15
-rw-r--r--Documentation/i2c/slave-testunit-backend.rst237
-rw-r--r--Documentation/i2c/summary.rst79
-rw-r--r--Documentation/i2c/writing-clients.rst35
-rw-r--r--Documentation/iio/ad3552r.rst73
-rw-r--r--Documentation/iio/ad4000.rst220
-rw-r--r--Documentation/iio/ad4030.rst180
-rw-r--r--Documentation/iio/ad4695.rst269
-rw-r--r--Documentation/iio/ad7191.rst119
-rw-r--r--Documentation/iio/ad7380.rst212
-rw-r--r--Documentation/iio/ad7606.rst189
-rw-r--r--Documentation/iio/ad7625.rst91
-rw-r--r--Documentation/iio/ad7944.rst178
-rw-r--r--Documentation/iio/ade9000.rst268
-rw-r--r--Documentation/iio/adis16475.rst384
-rw-r--r--Documentation/iio/adis16480.rst446
-rw-r--r--Documentation/iio/adis16550.rst376
-rw-r--r--Documentation/iio/adxl313.rst293
-rw-r--r--Documentation/iio/adxl345.rst443
-rw-r--r--Documentation/iio/adxl380.rst233
-rw-r--r--Documentation/iio/bno055.rst14
-rw-r--r--Documentation/iio/iio_adc.rst305
-rw-r--r--Documentation/iio/iio_devbuf.rst152
-rw-r--r--Documentation/iio/iio_dmabuf_api.rst54
-rw-r--r--Documentation/iio/iio_tools.rst27
-rw-r--r--Documentation/iio/index.rst28
-rw-r--r--Documentation/iio/opt4060.rst61
-rw-r--r--Documentation/index.rst57
-rw-r--r--Documentation/infiniband/index.rst1
-rw-r--r--Documentation/infiniband/ucaps.rst71
-rw-r--r--Documentation/input/devices/amijoy.rst125
-rw-r--r--Documentation/input/devices/edt-ft5x06.rst21
-rw-r--r--Documentation/input/devices/elantech.rst2
-rw-r--r--Documentation/input/event-codes.rst25
-rw-r--r--Documentation/input/gamepad.rst19
-rw-r--r--Documentation/input/input-programming.rst19
-rw-r--r--Documentation/input/input.rst2
-rw-r--r--Documentation/kbuild/Kconfig.recursion-issue-016
-rw-r--r--Documentation/kbuild/bash-completion.rst65
-rw-r--r--Documentation/kbuild/gendwarfksyms.rst395
-rw-r--r--Documentation/kbuild/index.rst3
-rw-r--r--Documentation/kbuild/kbuild.rst37
-rw-r--r--Documentation/kbuild/kconfig-language.rst101
-rw-r--r--Documentation/kbuild/kconfig.rst363
-rw-r--r--Documentation/kbuild/llvm.rst7
-rw-r--r--Documentation/kbuild/makefiles.rst58
-rw-r--r--Documentation/kbuild/modules.rst265
-rw-r--r--Documentation/kbuild/reproducible-builds.rst3
-rw-r--r--Documentation/kernel-hacking/false-sharing.rst4
-rw-r--r--Documentation/kernel-hacking/hacking.rst2
-rw-r--r--Documentation/leds/index.rst1
-rw-r--r--Documentation/leds/leds-blinkm.rst31
-rw-r--r--Documentation/leds/leds-class-multicolor.rst82
-rw-r--r--Documentation/leds/leds-lp5521.rst2
-rw-r--r--Documentation/leds/leds-lp5523.rst2
-rw-r--r--Documentation/leds/leds-mlxcpld.rst2
-rw-r--r--Documentation/leds/leds-st1202.rst34
-rw-r--r--Documentation/leds/well-known-leds.txt8
-rw-r--r--Documentation/litmus-tests/README45
-rw-r--r--Documentation/litmus-tests/atomic/cmpxchg-fail-ordered-1.litmus35
-rw-r--r--Documentation/litmus-tests/atomic/cmpxchg-fail-ordered-2.litmus30
-rw-r--r--Documentation/litmus-tests/atomic/cmpxchg-fail-unordered-1.litmus34
-rw-r--r--Documentation/litmus-tests/atomic/cmpxchg-fail-unordered-2.litmus30
-rw-r--r--Documentation/livepatch/livepatch.rst2
-rw-r--r--Documentation/livepatch/module-elf-format.rst13
-rw-r--r--Documentation/locking/hwspinlock.rst68
-rw-r--r--Documentation/locking/locktypes.rst21
-rw-r--r--Documentation/locking/percpu-rw-semaphore.rst4
-rw-r--r--Documentation/locking/seqlock.rst13
-rw-r--r--Documentation/maintainer/configure-git.rst28
-rw-r--r--Documentation/maintainer/feature-and-driver-maintainers.rst11
-rw-r--r--Documentation/maintainer/maintainer-entry-profile.rst7
-rw-r--r--Documentation/maintainer/pull-requests.rst2
-rw-r--r--Documentation/memory-barriers.txt7
-rw-r--r--Documentation/misc-devices/amd-sbi.rst101
-rw-r--r--Documentation/misc-devices/index.rst2
-rw-r--r--Documentation/misc-devices/lis3lv02d.rst6
-rw-r--r--Documentation/misc-devices/mrvl_cn10k_dpi.rst52
-rw-r--r--Documentation/misc-devices/tps6594-pfsm.rst12
-rw-r--r--Documentation/misc-devices/uacce.rst7
-rw-r--r--Documentation/misc-devices/xilinx_sdfec.rst2
-rw-r--r--Documentation/mm/active_mm.rst2
-rw-r--r--Documentation/mm/allocation-profiling.rst104
-rw-r--r--Documentation/mm/arch_pgtable_helpers.rst30
-rw-r--r--Documentation/mm/balance.rst2
-rw-r--r--Documentation/mm/damon/design.rst499
-rw-r--r--Documentation/mm/damon/index.rst34
-rw-r--r--Documentation/mm/damon/maintainer-profile.rst97
-rw-r--r--Documentation/mm/damon/monitoring_intervals_tuning_example.rst247
-rw-r--r--Documentation/mm/hmm.rst12
-rw-r--r--Documentation/mm/index.rst22
-rw-r--r--Documentation/mm/memfd_preservation.rst23
-rw-r--r--Documentation/mm/memory-model.rst2
-rw-r--r--Documentation/mm/page_cache.rst10
-rw-r--r--Documentation/mm/page_frags.rst2
-rw-r--r--Documentation/mm/page_migration.rst57
-rw-r--r--Documentation/mm/page_owner.rst78
-rw-r--r--Documentation/mm/page_table_check.rst9
-rw-r--r--Documentation/mm/page_tables.rst2
-rw-r--r--Documentation/mm/physical_memory.rst270
-rw-r--r--Documentation/mm/process_addrs.rst911
-rw-r--r--Documentation/mm/slab.rst7
-rw-r--r--Documentation/mm/slub.rst461
-rw-r--r--Documentation/mm/split_page_table_lock.rst12
-rw-r--r--Documentation/mm/swap-table.rst69
-rw-r--r--Documentation/mm/transhuge.rst45
-rw-r--r--Documentation/mm/unevictable-lru.rst18
-rw-r--r--Documentation/mm/vmalloced-kernel-stacks.rst18
-rw-r--r--Documentation/mm/vmemmap_dedup.rst22
-rw-r--r--Documentation/mm/z3fold.rst28
-rw-r--r--Documentation/mm/zsmalloc.rst5
-rw-r--r--Documentation/netlink/genetlink-c.yaml58
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml76
-rw-r--r--Documentation/netlink/genetlink.yaml49
-rw-r--r--Documentation/netlink/netlink-raw.yaml64
-rw-r--r--Documentation/netlink/specs/binder.yaml93
-rw-r--r--Documentation/netlink/specs/conntrack.yaml642
-rw-r--r--Documentation/netlink/specs/devlink.yaml285
-rw-r--r--Documentation/netlink/specs/dpll.yaml184
-rw-r--r--Documentation/netlink/specs/em.yaml113
-rw-r--r--Documentation/netlink/specs/ethtool.yaml1201
-rw-r--r--Documentation/netlink/specs/fou.yaml54
-rw-r--r--Documentation/netlink/specs/handshake.yaml14
-rw-r--r--Documentation/netlink/specs/index.rst13
-rw-r--r--Documentation/netlink/specs/lockd.yaml45
-rw-r--r--Documentation/netlink/specs/mptcp_pm.yaml210
-rw-r--r--Documentation/netlink/specs/net_shaper.yaml363
-rw-r--r--Documentation/netlink/specs/netdev.yaml434
-rw-r--r--Documentation/netlink/specs/nfsd.yaml145
-rw-r--r--Documentation/netlink/specs/nftables.yaml1532
-rw-r--r--Documentation/netlink/specs/nl80211.yaml1933
-rw-r--r--Documentation/netlink/specs/nlctrl.yaml208
-rw-r--r--Documentation/netlink/specs/ovpn.yaml508
-rw-r--r--Documentation/netlink/specs/ovs_datapath.yaml14
-rw-r--r--Documentation/netlink/specs/ovs_flow.yaml41
-rw-r--r--Documentation/netlink/specs/ovs_vport.yaml15
-rw-r--r--Documentation/netlink/specs/psp.yaml282
-rw-r--r--Documentation/netlink/specs/rt-addr.yaml195
-rw-r--r--Documentation/netlink/specs/rt-link.yaml2555
-rw-r--r--Documentation/netlink/specs/rt-neigh.yaml453
-rw-r--r--Documentation/netlink/specs/rt-route.yaml324
-rw-r--r--Documentation/netlink/specs/rt-rule.yaml275
-rw-r--r--Documentation/netlink/specs/rt_addr.yaml179
-rw-r--r--Documentation/netlink/specs/rt_link.yaml1859
-rw-r--r--Documentation/netlink/specs/rt_route.yaml327
-rw-r--r--Documentation/netlink/specs/tc.yaml2539
-rw-r--r--Documentation/netlink/specs/tcp_metrics.yaml169
-rw-r--r--Documentation/netlink/specs/team.yaml206
-rw-r--r--Documentation/netlink/specs/wireguard.yaml298
-rw-r--r--Documentation/networking/6pack.rst2
-rw-r--r--Documentation/networking/af_xdp.rst48
-rw-r--r--Documentation/networking/arcnet-hardware.rst24
-rw-r--r--Documentation/networking/arcnet.rst48
-rw-r--r--Documentation/networking/ax25.rst7
-rw-r--r--Documentation/networking/bareudp.rst11
-rw-r--r--Documentation/networking/batman-adv.rst4
-rw-r--r--Documentation/networking/bonding.rst146
-rw-r--r--Documentation/networking/bridge.rst2
-rw-r--r--Documentation/networking/can.rst124
-rw-r--r--Documentation/networking/cdc_mbim.rst2
-rw-r--r--Documentation/networking/dccp.rst219
-rw-r--r--Documentation/networking/device_drivers/cable/index.rst18
-rw-r--r--Documentation/networking/device_drivers/cable/sb1000.rst222
-rw-r--r--Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst22
-rw-r--r--Documentation/networking/device_drivers/ethernet/amazon/ena.rst119
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst2
-rw-r--r--Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst2
-rw-r--r--Documentation/networking/device_drivers/ethernet/huawei/hinic3.rst137
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst10
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/i40e.rst12
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ice.rst65
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst24
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst93
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst93
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/kconfig.rst3
-rw-r--r--Documentation/networking/device_drivers/ethernet/meta/fbnic.rst192
-rw-r--r--Documentation/networking/device_drivers/ethernet/mucse/rnpgbe.rst17
-rw-r--r--Documentation/networking/device_drivers/ethernet/pensando/ionic.rst32
-rw-r--r--Documentation/networking/device_drivers/ethernet/pensando/ionic_rdma.rst52
-rw-r--r--Documentation/networking/device_drivers/ethernet/qualcomm/ppe/ppe.rst194
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/am65_nuss_cpsw_switchdev.rst2
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/cpsw.rst6
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/cpsw_switchdev.rst2
-rw-r--r--Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst56
-rw-r--r--Documentation/networking/device_drivers/ethernet/toshiba/spider_net.rst202
-rw-r--r--Documentation/networking/device_drivers/ethernet/wangxun/ngbevf.rst16
-rw-r--r--Documentation/networking/device_drivers/ethernet/wangxun/txgbevf.rst16
-rw-r--r--Documentation/networking/device_drivers/index.rst1
-rw-r--r--Documentation/networking/device_drivers/wwan/t7xx.rst106
-rw-r--r--Documentation/networking/devlink/bnxt.rst2
-rw-r--r--Documentation/networking/devlink/devlink-eswitch-attr.rst89
-rw-r--r--Documentation/networking/devlink/devlink-health.rst2
-rw-r--r--Documentation/networking/devlink/devlink-info.rst9
-rw-r--r--Documentation/networking/devlink/devlink-params.rst28
-rw-r--r--Documentation/networking/devlink/devlink-port.rst43
-rw-r--r--Documentation/networking/devlink/devlink-region.rst2
-rw-r--r--Documentation/networking/devlink/devlink-trap.rst2
-rw-r--r--Documentation/networking/devlink/hns3.rst5
-rw-r--r--Documentation/networking/devlink/i40e.rst34
-rw-r--r--Documentation/networking/devlink/ice.rst83
-rw-r--r--Documentation/networking/devlink/index.rst24
-rw-r--r--Documentation/networking/devlink/ixgbe.rst171
-rw-r--r--Documentation/networking/devlink/kvaser_pciefd.rst24
-rw-r--r--Documentation/networking/devlink/kvaser_usb.rst33
-rw-r--r--Documentation/networking/devlink/mlx5.rst143
-rw-r--r--Documentation/networking/devlink/netdevsim.rst2
-rw-r--r--Documentation/networking/devlink/nfp.rst5
-rw-r--r--Documentation/networking/devlink/octeontx2.rst37
-rw-r--r--Documentation/networking/devlink/sfc.rst16
-rw-r--r--Documentation/networking/devlink/stmmac.rst40
-rw-r--r--Documentation/networking/devlink/zl3073x.rst65
-rw-r--r--Documentation/networking/devmem.rst419
-rw-r--r--Documentation/networking/diagnostic/index.rst17
-rw-r--r--Documentation/networking/diagnostic/twisted_pair_layer1_diagnostics.rst784
-rw-r--r--Documentation/networking/dns_resolver.rst56
-rw-r--r--Documentation/networking/dsa/dsa.rst17
-rw-r--r--Documentation/networking/ethtool-netlink.rst588
-rw-r--r--Documentation/networking/filter.rst4
-rw-r--r--Documentation/networking/ieee802154.rst16
-rw-r--r--Documentation/networking/index.rst20
-rw-r--r--Documentation/networking/iou-zcrx.rst202
-rw-r--r--Documentation/networking/ip-sysctl.rst993
-rw-r--r--Documentation/networking/iso15765-2.rst386
-rw-r--r--Documentation/networking/j1939.rst677
-rw-r--r--Documentation/networking/kapi.rst3
-rw-r--r--Documentation/networking/kcm.rst2
-rw-r--r--Documentation/networking/l2tp.rst189
-rw-r--r--Documentation/networking/mptcp-sysctl.rst116
-rw-r--r--Documentation/networking/mptcp.rst156
-rw-r--r--Documentation/networking/multi-pf-netdev.rst174
-rw-r--r--Documentation/networking/napi.rst272
-rw-r--r--Documentation/networking/net_cachelines/inet_connection_sock.rst85
-rw-r--r--Documentation/networking/net_cachelines/inet_sock.rst83
-rw-r--r--Documentation/networking/net_cachelines/net_device.rst357
-rw-r--r--Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst305
-rw-r--r--Documentation/networking/net_cachelines/snmp.rst260
-rw-r--r--Documentation/networking/net_cachelines/tcp_sock.rst265
-rw-r--r--Documentation/networking/net_dim.rst44
-rw-r--r--Documentation/networking/net_failover.rst6
-rw-r--r--Documentation/networking/netconsole.rst210
-rw-r--r--Documentation/networking/netdev-features.rst20
-rw-r--r--Documentation/networking/netdevices.rst153
-rw-r--r--Documentation/networking/netlink_spec/.gitignore1
-rw-r--r--Documentation/networking/netlink_spec/readme.txt4
-rw-r--r--Documentation/networking/netmem.rst98
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.rst5
-rw-r--r--Documentation/networking/nfc.rst6
-rw-r--r--Documentation/networking/oa-tc6-framework.rst497
-rw-r--r--Documentation/networking/packet_mmap.rst5
-rw-r--r--Documentation/networking/phy-link-topology.rst121
-rw-r--r--Documentation/networking/phy.rst15
-rw-r--r--Documentation/networking/pse-pd/index.rst10
-rw-r--r--Documentation/networking/pse-pd/introduction.rst73
-rw-r--r--Documentation/networking/pse-pd/pse-pi.rst301
-rw-r--r--Documentation/networking/psp.rst183
-rw-r--r--Documentation/networking/rds.rst10
-rw-r--r--Documentation/networking/representors.rst1
-rw-r--r--Documentation/networking/rxrpc.rst48
-rw-r--r--Documentation/networking/scaling.rst21
-rw-r--r--Documentation/networking/seg6-sysctl.rst3
-rw-r--r--Documentation/networking/segmentation-offloads.rst22
-rw-r--r--Documentation/networking/sfp-phylink.rst147
-rw-r--r--Documentation/networking/smc-sysctl.rst40
-rw-r--r--Documentation/networking/sriov.rst25
-rw-r--r--Documentation/networking/statistics.rst21
-rw-r--r--Documentation/networking/strparser.rst11
-rw-r--r--Documentation/networking/switchdev.rst4
-rw-r--r--Documentation/networking/tcp_ao.rst29
-rw-r--r--Documentation/networking/timestamping.rst88
-rw-r--r--Documentation/networking/tipc.rst2
-rw-r--r--Documentation/networking/tls-offload.rst29
-rw-r--r--Documentation/networking/tls.rst60
-rw-r--r--Documentation/networking/tproxy.rst6
-rw-r--r--Documentation/networking/xdp-rx-metadata.rst33
-rw-r--r--Documentation/networking/xfrm/index.rst13
-rw-r--r--Documentation/networking/xfrm/xfrm_device.rst (renamed from Documentation/networking/xfrm_device.rst)40
-rw-r--r--Documentation/networking/xfrm/xfrm_proc.rst119
-rw-r--r--Documentation/networking/xfrm/xfrm_sync.rst192
-rw-r--r--Documentation/networking/xfrm/xfrm_sysctl.rst11
-rw-r--r--Documentation/networking/xfrm_proc.rst113
-rw-r--r--Documentation/networking/xfrm_sync.rst189
-rw-r--r--Documentation/networking/xfrm_sysctl.rst11
-rw-r--r--Documentation/networking/xsk-tx-metadata.rst78
-rw-r--r--Documentation/nvme/feature-and-quirk-policy.rst6
-rw-r--r--Documentation/nvme/index.rst12
-rw-r--r--Documentation/nvme/nvme-pci-endpoint-target.rst368
-rw-r--r--Documentation/power/energy-model.rst183
-rw-r--r--Documentation/power/index.rst1
-rw-r--r--Documentation/power/opp.rst2
-rw-r--r--Documentation/power/pci.rst19
-rw-r--r--Documentation/power/pm_qos_interface.rst16
-rw-r--r--Documentation/power/power_supply_class.rst84
-rw-r--r--Documentation/power/regulator/consumer.rst36
-rw-r--r--Documentation/power/runtime_pm.rst73
-rw-r--r--Documentation/power/shutdown-debugging.rst53
-rw-r--r--Documentation/power/suspend-and-cpuhotplug.rst2
-rw-r--r--Documentation/power/suspend-and-interrupts.rst2
-rw-r--r--Documentation/power/video.rst2
-rw-r--r--Documentation/process/1.Intro.rst12
-rw-r--r--Documentation/process/2.Process.rst55
-rw-r--r--Documentation/process/4.Coding.rst2
-rw-r--r--Documentation/process/5.Posting.rst29
-rw-r--r--Documentation/process/adding-syscalls.rst84
-rw-r--r--Documentation/process/backporting.rst14
-rw-r--r--Documentation/process/changes.rst75
-rw-r--r--Documentation/process/code-of-conduct-interpretation.rst98
-rw-r--r--Documentation/process/coding-style.rst59
-rw-r--r--Documentation/process/cve.rst121
-rw-r--r--Documentation/process/debugging/driver_development_debugging_guide.rst235
-rw-r--r--Documentation/process/debugging/gdb-kernel-debugging.rst (renamed from Documentation/dev-tools/gdb-kernel-debugging.rst)34
-rw-r--r--Documentation/process/debugging/index.rst80
-rw-r--r--Documentation/process/debugging/kgdb.rst (renamed from Documentation/dev-tools/kgdb.rst)70
-rw-r--r--Documentation/process/debugging/media_specific_debugging_guide.rst180
-rw-r--r--Documentation/process/debugging/userspace_debugging_guide.rst280
-rw-r--r--Documentation/process/email-clients.rst35
-rw-r--r--Documentation/process/embargoed-hardware-issues.rst159
-rw-r--r--Documentation/process/handling-regressions.rst44
-rw-r--r--Documentation/process/howto.rst14
-rw-r--r--Documentation/process/index.rst20
-rw-r--r--Documentation/process/kernel-docs.rst225
-rw-r--r--Documentation/process/license-rules.rst18
-rw-r--r--Documentation/process/magic-number.rst84
-rw-r--r--Documentation/process/maintainer-netdev.rst111
-rw-r--r--Documentation/process/maintainer-pgp-guide.rst158
-rw-r--r--Documentation/process/maintainer-soc-clean-dts.rst5
-rw-r--r--Documentation/process/maintainer-soc.rst48
-rw-r--r--Documentation/process/maintainer-tip.rst90
-rw-r--r--Documentation/process/researcher-guidelines.rst2
-rw-r--r--Documentation/process/security-bugs.rst30
-rw-r--r--Documentation/process/stable-kernel-rules.rst236
-rw-r--r--Documentation/process/submit-checklist.rst173
-rw-r--r--Documentation/process/submitting-patches.rst167
-rw-r--r--Documentation/rust/arch-support.rst7
-rw-r--r--Documentation/rust/coding-guidelines.rst296
-rw-r--r--Documentation/rust/general-information.rst106
-rw-r--r--Documentation/rust/index.rst22
-rw-r--r--Documentation/rust/quick-start.rst206
-rw-r--r--Documentation/rust/testing.rst236
-rw-r--r--Documentation/scheduler/completion.rst2
-rw-r--r--Documentation/scheduler/index.rst3
-rw-r--r--Documentation/scheduler/membarrier.rst39
-rw-r--r--Documentation/scheduler/sched-bwc.rst2
-rw-r--r--Documentation/scheduler/sched-deadline.rst114
-rw-r--r--Documentation/scheduler/sched-debug.rst2
-rw-r--r--Documentation/scheduler/sched-design-CFS.rst17
-rw-r--r--Documentation/scheduler/sched-domains.rst17
-rw-r--r--Documentation/scheduler/sched-eevdf.rst43
-rw-r--r--Documentation/scheduler/sched-ext.rst367
-rw-r--r--Documentation/scheduler/sched-rt-group.rst11
-rw-r--r--Documentation/scheduler/sched-stats.rst140
-rw-r--r--Documentation/scsi/scsi_eh.rst46
-rw-r--r--Documentation/scsi/scsi_fc_transport.rst35
-rw-r--r--Documentation/scsi/scsi_mid_low_api.rst252
-rw-r--r--Documentation/scsi/st.rst5
-rw-r--r--Documentation/security/SCTP.rst2
-rw-r--r--Documentation/security/credentials.rst7
-rw-r--r--Documentation/security/index.rst1
-rw-r--r--Documentation/security/ipe.rst446
-rw-r--r--Documentation/security/keys/trusted-encrypted.rst141
-rw-r--r--Documentation/security/landlock.rst36
-rw-r--r--Documentation/security/self-protection.rst2
-rw-r--r--Documentation/security/snp-tdx-threat-model.rst2
-rw-r--r--Documentation/security/tpm/index.rst3
-rw-r--r--Documentation/security/tpm/tpm-security.rst216
-rw-r--r--Documentation/security/tpm/tpm_ffa_crb.rst65
-rw-r--r--Documentation/security/tpm/tpm_tis.rst46
-rw-r--r--Documentation/sound/alsa-configuration.rst121
-rw-r--r--Documentation/sound/cards/emu-mixer.rst2
-rw-r--r--Documentation/sound/codecs/cs35l56.rst305
-rw-r--r--Documentation/sound/codecs/index.rst9
-rw-r--r--Documentation/sound/designs/compress-accel.rst134
-rw-r--r--Documentation/sound/designs/index.rst1
-rw-r--r--Documentation/sound/designs/midi-2.0.rst18
-rw-r--r--Documentation/sound/designs/powersave.rst6
-rw-r--r--Documentation/sound/hd-audio/notes.rst24
-rw-r--r--Documentation/sound/index.rst2
-rw-r--r--Documentation/sound/kernel-api/writing-an-alsa-driver.rst35
-rw-r--r--Documentation/sound/soc/clocking.rst12
-rw-r--r--Documentation/sound/soc/codec-to-codec.rst4
-rw-r--r--Documentation/sound/soc/codec.rst4
-rw-r--r--Documentation/sound/soc/dapm-graph.svg375
-rw-r--r--Documentation/sound/soc/dapm.rst170
-rw-r--r--Documentation/sound/soc/dpcm.rst32
-rw-r--r--Documentation/sound/soc/index.rst1
-rw-r--r--Documentation/sound/soc/machine.rst26
-rw-r--r--Documentation/sound/soc/platform.rst4
-rw-r--r--Documentation/sound/soc/usb.rst482
-rw-r--r--Documentation/sound/utimers.rst126
-rw-r--r--Documentation/sphinx-static/custom.css15
-rw-r--r--Documentation/sphinx/automarkup.py162
-rw-r--r--Documentation/sphinx/cdomain.py249
-rw-r--r--Documentation/sphinx/kernel_abi.py164
-rw-r--r--Documentation/sphinx/kernel_feat.py36
-rwxr-xr-xDocumentation/sphinx/kernel_include.py614
-rw-r--r--Documentation/sphinx/kerneldoc-preamble.sty20
-rw-r--r--Documentation/sphinx/kerneldoc.py229
-rw-r--r--Documentation/sphinx/kernellog.py22
-rw-r--r--Documentation/sphinx/kfigure.py92
-rw-r--r--Documentation/sphinx/load_config.py59
-rwxr-xr-xDocumentation/sphinx/maintainers_include.py8
-rw-r--r--Documentation/sphinx/min_requirements.txt11
-rw-r--r--Documentation/sphinx/parallel-wrapper.sh33
-rwxr-xr-xDocumentation/sphinx/parse-headers.pl401
-rwxr-xr-xDocumentation/sphinx/parser_yaml.py123
-rw-r--r--Documentation/sphinx/requirements.txt8
-rwxr-xr-xDocumentation/sphinx/rstFlatTable.py11
-rw-r--r--Documentation/sphinx/templates/kernel-toc.html3
-rw-r--r--Documentation/sphinx/templates/translations.html4
-rw-r--r--Documentation/sphinx/translations.py10
-rw-r--r--Documentation/spi/index.rst1
-rw-r--r--Documentation/spi/pxa2xx.rst211
-rw-r--r--Documentation/spi/spi-lm70llp.rst4
-rw-r--r--Documentation/spi/spi-summary.rst202
-rw-r--r--Documentation/spi/spidev.rst2
-rw-r--r--Documentation/staging/crc32.rst4
-rw-r--r--Documentation/staging/index.rst1
-rw-r--r--Documentation/staging/magic-number.rst84
-rw-r--r--Documentation/staging/remoteproc.rst2
-rw-r--r--Documentation/staging/rpmsg.rst48
-rw-r--r--Documentation/staging/speculation.rst1
-rw-r--r--Documentation/staging/xz.rst157
-rw-r--r--Documentation/subsystem-apis.rst4
-rw-r--r--Documentation/sunrpc/xdr/nfs4_1.x186
-rw-r--r--Documentation/tee/index.rst2
-rw-r--r--Documentation/tee/qtee.rst96
-rw-r--r--Documentation/tee/ts-tee.rst71
-rw-r--r--Documentation/timers/delay_sleep_functions.rst121
-rw-r--r--Documentation/timers/index.rst2
-rw-r--r--Documentation/timers/no_hz.rst7
-rw-r--r--Documentation/timers/timers-howto.rst115
-rw-r--r--Documentation/tools/rtla/common_appendix.rst13
-rw-r--r--Documentation/tools/rtla/common_appendix.txt24
-rw-r--r--Documentation/tools/rtla/common_hist_options.txt (renamed from Documentation/tools/rtla/common_hist_options.rst)0
-rw-r--r--Documentation/tools/rtla/common_options.rst55
-rw-r--r--Documentation/tools/rtla/common_options.txt129
-rw-r--r--Documentation/tools/rtla/common_osnoise_description.txt (renamed from Documentation/tools/rtla/common_osnoise_description.rst)0
-rw-r--r--Documentation/tools/rtla/common_osnoise_options.rst27
-rw-r--r--Documentation/tools/rtla/common_osnoise_options.txt39
-rw-r--r--Documentation/tools/rtla/common_timerlat_aa.txt (renamed from Documentation/tools/rtla/common_timerlat_aa.rst)0
-rw-r--r--Documentation/tools/rtla/common_timerlat_description.rst10
-rw-r--r--Documentation/tools/rtla/common_timerlat_description.txt18
-rw-r--r--Documentation/tools/rtla/common_timerlat_options.rst35
-rw-r--r--Documentation/tools/rtla/common_timerlat_options.txt67
-rw-r--r--Documentation/tools/rtla/common_top_options.txt (renamed from Documentation/tools/rtla/common_top_options.rst)0
-rw-r--r--Documentation/tools/rtla/rtla-hwnoise.rst10
-rw-r--r--Documentation/tools/rtla/rtla-osnoise-hist.rst12
-rw-r--r--Documentation/tools/rtla/rtla-osnoise-top.rst12
-rw-r--r--Documentation/tools/rtla/rtla-osnoise.rst4
-rw-r--r--Documentation/tools/rtla/rtla-timerlat-hist.rst14
-rw-r--r--Documentation/tools/rtla/rtla-timerlat-top.rst16
-rw-r--r--Documentation/tools/rtla/rtla-timerlat.rst13
-rw-r--r--Documentation/tools/rtla/rtla.rst2
-rw-r--r--Documentation/tools/rv/index.rst1
-rw-r--r--Documentation/tools/rv/rv-mon-sched.rst69
-rw-r--r--Documentation/tools/rv/rv-mon.rst6
-rw-r--r--Documentation/trace/boottime-trace.rst6
-rw-r--r--Documentation/trace/coresight/coresight-perf.rst31
-rw-r--r--Documentation/trace/coresight/coresight.rst41
-rw-r--r--Documentation/trace/coresight/panic.rst362
-rw-r--r--Documentation/trace/debugging.rst161
-rw-r--r--Documentation/trace/eprobetrace.rst269
-rw-r--r--Documentation/trace/events.rst32
-rw-r--r--Documentation/trace/fprobe.rst44
-rw-r--r--Documentation/trace/fprobetrace.rst31
-rw-r--r--Documentation/trace/ftrace-design.rst24
-rw-r--r--Documentation/trace/ftrace-uses.rst2
-rw-r--r--Documentation/trace/ftrace.rst66
-rw-r--r--Documentation/trace/hisi-ptt.rst4
-rw-r--r--Documentation/trace/histogram-design.rst175
-rw-r--r--Documentation/trace/histogram.rst44
-rw-r--r--Documentation/trace/index.rst97
-rw-r--r--Documentation/trace/kprobes.rst1
-rw-r--r--Documentation/trace/kprobetrace.rst17
-rw-r--r--Documentation/trace/osnoise-tracer.rst2
-rw-r--r--Documentation/trace/postprocess/decode_msr.py2
-rw-r--r--Documentation/trace/ring-buffer-map.rst106
-rw-r--r--Documentation/trace/rv/da_monitor_synthesis.rst147
-rw-r--r--Documentation/trace/rv/index.rst5
-rw-r--r--Documentation/trace/rv/linear_temporal_logic.rst134
-rw-r--r--Documentation/trace/rv/monitor_rtapp.rst133
-rw-r--r--Documentation/trace/rv/monitor_sched.rst402
-rw-r--r--Documentation/trace/rv/monitor_synthesis.rst271
-rw-r--r--Documentation/trace/rv/runtime-verification.rst4
-rw-r--r--Documentation/trace/timerlat-tracer.rst12
-rw-r--r--Documentation/trace/tracepoints.rst19
-rw-r--r--Documentation/trace/user_events.rst27
-rw-r--r--Documentation/translations/it_IT/RCU/index.rst19
-rw-r--r--Documentation/translations/it_IT/RCU/torture.rst369
-rw-r--r--Documentation/translations/it_IT/arch/riscv/patch-acceptance.rst60
-rw-r--r--Documentation/translations/it_IT/core-api/index.rst12
-rw-r--r--Documentation/translations/it_IT/core-api/symbol-namespaces.rst40
-rw-r--r--Documentation/translations/it_IT/dev-tools/clang-format.rst (renamed from Documentation/translations/it_IT/process/clang-format.rst)2
-rw-r--r--Documentation/translations/it_IT/dev-tools/index.rst17
-rw-r--r--Documentation/translations/it_IT/doc-guide/kernel-doc.rst54
-rw-r--r--Documentation/translations/it_IT/doc-guide/parse-headers.rst10
-rw-r--r--Documentation/translations/it_IT/doc-guide/sphinx.rst4
-rw-r--r--Documentation/translations/it_IT/i2c/i2c-protocol.rst99
-rw-r--r--Documentation/translations/it_IT/i2c/index.rst46
-rw-r--r--Documentation/translations/it_IT/i2c/summary.rst84
-rw-r--r--Documentation/translations/it_IT/index.rst12
-rw-r--r--Documentation/translations/it_IT/locking/index.rst20
-rw-r--r--Documentation/translations/it_IT/locking/lockdep-design.rst678
-rw-r--r--Documentation/translations/it_IT/locking/lockstat.rst230
-rw-r--r--Documentation/translations/it_IT/locking/locktorture.rst181
-rw-r--r--Documentation/translations/it_IT/locking/locktypes.rst547
-rw-r--r--Documentation/translations/it_IT/networking/netdev-FAQ.rst13
-rw-r--r--Documentation/translations/it_IT/process/2.Process.rst15
-rw-r--r--Documentation/translations/it_IT/process/4.Coding.rst6
-rw-r--r--Documentation/translations/it_IT/process/5.Posting.rst32
-rw-r--r--Documentation/translations/it_IT/process/6.Followthrough.rst7
-rw-r--r--Documentation/translations/it_IT/process/7.AdvancedTopics.rst19
-rw-r--r--Documentation/translations/it_IT/process/changes.rst89
-rw-r--r--Documentation/translations/it_IT/process/coding-style.rst55
-rw-r--r--Documentation/translations/it_IT/process/deprecated.rst2
-rw-r--r--Documentation/translations/it_IT/process/email-clients.rst41
-rw-r--r--Documentation/translations/it_IT/process/howto.rst16
-rw-r--r--Documentation/translations/it_IT/process/index.rst72
-rw-r--r--Documentation/translations/it_IT/process/magic-number.rst90
-rw-r--r--Documentation/translations/it_IT/process/security-bugs.rst (renamed from Documentation/translations/it_IT/admin-guide/security-bugs.rst)0
-rw-r--r--Documentation/translations/it_IT/process/stable-kernel-rules.rst314
-rw-r--r--Documentation/translations/it_IT/process/submit-checklist.rst168
-rw-r--r--Documentation/translations/it_IT/process/submitting-patches.rst185
-rw-r--r--Documentation/translations/it_IT/riscv/patch-acceptance.rst40
-rw-r--r--Documentation/translations/it_IT/staging/index.rst13
-rw-r--r--Documentation/translations/it_IT/staging/magic-number.rst90
-rw-r--r--Documentation/translations/it_IT/subsystem-apis.rst47
-rw-r--r--Documentation/translations/ja_JP/SubmitChecklist105
-rw-r--r--Documentation/translations/ja_JP/SubmittingPatches28
-rw-r--r--Documentation/translations/ja_JP/disclaimer-ja_JP.rst24
-rw-r--r--Documentation/translations/ja_JP/index.rst4
-rw-r--r--Documentation/translations/ja_JP/process/howto.rst (renamed from Documentation/translations/ja_JP/howto.rst)49
-rw-r--r--Documentation/translations/ja_JP/process/submit-checklist.rst163
-rw-r--r--Documentation/translations/ko_KR/core-api/wrappers/memory-barriers.rst18
-rw-r--r--Documentation/translations/ko_KR/index.rst16
-rw-r--r--Documentation/translations/ko_KR/process/howto.rst (renamed from Documentation/translations/ko_KR/howto.rst)0
-rw-r--r--Documentation/translations/sp_SP/index.rst3
-rw-r--r--Documentation/translations/sp_SP/memory-barriers.txt4
-rw-r--r--Documentation/translations/sp_SP/process/1.Intro.rst302
-rw-r--r--Documentation/translations/sp_SP/process/2.Process.rst543
-rw-r--r--Documentation/translations/sp_SP/process/3.Early-stage.rst241
-rw-r--r--Documentation/translations/sp_SP/process/4.Coding.rst470
-rw-r--r--Documentation/translations/sp_SP/process/5.Posting.rst391
-rw-r--r--Documentation/translations/sp_SP/process/6.Followthrough.rst230
-rw-r--r--Documentation/translations/sp_SP/process/7.AdvancedTopics.rst214
-rw-r--r--Documentation/translations/sp_SP/process/8.Conclusion.rst82
-rw-r--r--Documentation/translations/sp_SP/process/code-of-conduct.rst2
-rw-r--r--Documentation/translations/sp_SP/process/coding-style.rst10
-rw-r--r--Documentation/translations/sp_SP/process/development-process.rst32
-rw-r--r--Documentation/translations/sp_SP/process/email-clients.rst10
-rw-r--r--Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst4
-rw-r--r--Documentation/translations/sp_SP/process/howto.rst12
-rw-r--r--Documentation/translations/sp_SP/process/index.rst2
-rw-r--r--Documentation/translations/sp_SP/process/kernel-docs.rst7
-rw-r--r--Documentation/translations/sp_SP/process/kernel-enforcement-statement.rst2
-rw-r--r--Documentation/translations/sp_SP/process/magic-number.rst4
-rw-r--r--Documentation/translations/sp_SP/process/maintainer-kvm-x86.rst465
-rw-r--r--Documentation/translations/sp_SP/process/programming-language.rst2
-rw-r--r--Documentation/translations/sp_SP/process/researcher-guidelines.rst2
-rw-r--r--Documentation/translations/sp_SP/process/submit-checklist.rst7
-rw-r--r--Documentation/translations/sp_SP/process/submitting-patches.rst43
-rw-r--r--Documentation/translations/sp_SP/scheduler/index.rst10
-rw-r--r--Documentation/translations/sp_SP/scheduler/sched-bwc.rst287
-rw-r--r--Documentation/translations/sp_SP/scheduler/sched-design-CFS.rst277
-rw-r--r--Documentation/translations/sp_SP/scheduler/sched-eevdf.rst58
-rw-r--r--Documentation/translations/zh_CN/PCI/msi-howto.rst2
-rw-r--r--Documentation/translations/zh_CN/PCI/pci.rst2
-rw-r--r--Documentation/translations/zh_CN/PCI/pciebus-howto.rst2
-rw-r--r--Documentation/translations/zh_CN/admin-guide/README.rst6
-rw-r--r--Documentation/translations/zh_CN/admin-guide/bug-hunting.rst13
-rw-r--r--Documentation/translations/zh_CN/admin-guide/index.rst3
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst4
-rw-r--r--Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst260
-rw-r--r--Documentation/translations/zh_CN/admin-guide/numastat.rst48
-rw-r--r--Documentation/translations/zh_CN/admin-guide/reporting-issues.rst4
-rw-r--r--Documentation/translations/zh_CN/admin-guide/security-bugs.rst74
-rw-r--r--Documentation/translations/zh_CN/admin-guide/sysrq.rst22
-rw-r--r--Documentation/translations/zh_CN/arch/loongarch/irq-chip-model.rst87
-rw-r--r--Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst12
-rw-r--r--Documentation/translations/zh_CN/block/blk-mq.rst130
-rw-r--r--Documentation/translations/zh_CN/block/data-integrity.rst192
-rw-r--r--Documentation/translations/zh_CN/block/index.rst35
-rw-r--r--Documentation/translations/zh_CN/core-api/cachetlb.rst2
-rw-r--r--Documentation/translations/zh_CN/core-api/index.rst1
-rw-r--r--Documentation/translations/zh_CN/core-api/irq/irq-domain.rst8
-rw-r--r--Documentation/translations/zh_CN/core-api/memory-hotplug.rst3
-rw-r--r--Documentation/translations/zh_CN/core-api/printk-formats.rst3
-rw-r--r--Documentation/translations/zh_CN/core-api/symbol-namespaces.rst49
-rw-r--r--Documentation/translations/zh_CN/core-api/unaligned-memory-access.rst2
-rw-r--r--Documentation/translations/zh_CN/core-api/union_find.rst92
-rw-r--r--Documentation/translations/zh_CN/core-api/workqueue.rst398
-rw-r--r--Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst3
-rw-r--r--Documentation/translations/zh_CN/dev-tools/gcov.rst8
-rw-r--r--Documentation/translations/zh_CN/dev-tools/gdb-kernel-debugging.rst40
-rw-r--r--Documentation/translations/zh_CN/dev-tools/index.rst12
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kasan.rst56
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kcov.rst359
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kcsan.rst320
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kmemleak.rst229
-rw-r--r--Documentation/translations/zh_CN/dev-tools/kmsan.rst392
-rw-r--r--Documentation/translations/zh_CN/dev-tools/testing-overview.rst2
-rw-r--r--Documentation/translations/zh_CN/dev-tools/ubsan.rst88
-rw-r--r--Documentation/translations/zh_CN/devicetree/of_unittest.rst2
-rw-r--r--Documentation/translations/zh_CN/devicetree/overlay-notes.rst12
-rw-r--r--Documentation/translations/zh_CN/disclaimer-zh_CN.rst8
-rw-r--r--Documentation/translations/zh_CN/doc-guide/checktransupdate.rst55
-rw-r--r--Documentation/translations/zh_CN/doc-guide/contributing.rst2
-rw-r--r--Documentation/translations/zh_CN/doc-guide/index.rst1
-rw-r--r--Documentation/translations/zh_CN/doc-guide/parse-headers.rst8
-rw-r--r--Documentation/translations/zh_CN/doc-guide/sphinx.rst4
-rw-r--r--Documentation/translations/zh_CN/driver-api/gpio/index.rst4
-rw-r--r--Documentation/translations/zh_CN/driver-api/gpio/legacy.rst634
-rw-r--r--Documentation/translations/zh_CN/driver-api/index.rst2
-rw-r--r--Documentation/translations/zh_CN/driver-api/phy/index.rst20
-rw-r--r--Documentation/translations/zh_CN/driver-api/phy/phy.rst212
-rw-r--r--Documentation/translations/zh_CN/filesystems/dnotify.rst67
-rw-r--r--Documentation/translations/zh_CN/filesystems/gfs2-glocks.rst211
-rw-r--r--Documentation/translations/zh_CN/filesystems/gfs2-uevents.rst97
-rw-r--r--Documentation/translations/zh_CN/filesystems/gfs2.rst57
-rw-r--r--Documentation/translations/zh_CN/filesystems/index.rst17
-rw-r--r--Documentation/translations/zh_CN/filesystems/inotify.rst80
-rw-r--r--Documentation/translations/zh_CN/filesystems/sysfs.txt2
-rw-r--r--Documentation/translations/zh_CN/filesystems/ubifs-authentication.rst354
-rw-r--r--Documentation/translations/zh_CN/filesystems/ubifs.rst114
-rw-r--r--Documentation/translations/zh_CN/glossary.rst1
-rw-r--r--Documentation/translations/zh_CN/how-to.rst473
-rw-r--r--Documentation/translations/zh_CN/index.rst18
-rw-r--r--Documentation/translations/zh_CN/kbuild/gcc-plugins.rst126
-rw-r--r--Documentation/translations/zh_CN/kbuild/headers_install.rst39
-rw-r--r--Documentation/translations/zh_CN/kbuild/index.rst36
-rw-r--r--Documentation/translations/zh_CN/kbuild/kbuild.rst325
-rw-r--r--Documentation/translations/zh_CN/kbuild/kconfig.rst259
-rw-r--r--Documentation/translations/zh_CN/kbuild/llvm.rst203
-rw-r--r--Documentation/translations/zh_CN/kbuild/reproducible-builds.rst114
-rw-r--r--Documentation/translations/zh_CN/mm/active_mm.rst7
-rw-r--r--Documentation/translations/zh_CN/mm/balance.rst2
-rw-r--r--Documentation/translations/zh_CN/mm/damon/faq.rst17
-rw-r--r--Documentation/translations/zh_CN/mm/hmm.rst10
-rw-r--r--Documentation/translations/zh_CN/mm/index.rst3
-rw-r--r--Documentation/translations/zh_CN/mm/overcommit-accounting.rst3
-rw-r--r--Documentation/translations/zh_CN/mm/page_frags.rst2
-rw-r--r--Documentation/translations/zh_CN/mm/page_migration.rst6
-rw-r--r--Documentation/translations/zh_CN/mm/page_owner.rst46
-rw-r--r--Documentation/translations/zh_CN/mm/page_table_check.rst13
-rw-r--r--Documentation/translations/zh_CN/mm/page_tables.rst221
-rw-r--r--Documentation/translations/zh_CN/mm/physical_memory.rst356
-rw-r--r--Documentation/translations/zh_CN/mm/z3fold.rst31
-rw-r--r--Documentation/translations/zh_CN/networking/alias.rst56
-rw-r--r--Documentation/translations/zh_CN/networking/generic-hdlc.rst176
-rw-r--r--Documentation/translations/zh_CN/networking/index.rst159
-rw-r--r--Documentation/translations/zh_CN/networking/mptcp-sysctl.rst139
-rw-r--r--Documentation/translations/zh_CN/networking/msg_zerocopy.rst223
-rw-r--r--Documentation/translations/zh_CN/networking/napi.rst362
-rw-r--r--Documentation/translations/zh_CN/networking/netif-msg.rst92
-rw-r--r--Documentation/translations/zh_CN/networking/netmem.rst92
-rw-r--r--Documentation/translations/zh_CN/networking/timestamping.rst674
-rw-r--r--Documentation/translations/zh_CN/networking/vxlan.rst85
-rw-r--r--Documentation/translations/zh_CN/networking/xfrm_proc.rst126
-rw-r--r--Documentation/translations/zh_CN/power/opp.rst2
-rw-r--r--Documentation/translations/zh_CN/process/1.Intro.rst10
-rw-r--r--Documentation/translations/zh_CN/process/2.Process.rst7
-rw-r--r--Documentation/translations/zh_CN/process/4.Coding.rst2
-rw-r--r--Documentation/translations/zh_CN/process/5.Posting.rst15
-rw-r--r--Documentation/translations/zh_CN/process/6.Followthrough.rst5
-rw-r--r--Documentation/translations/zh_CN/process/7.AdvancedTopics.rst14
-rw-r--r--Documentation/translations/zh_CN/process/coding-style.rst17
-rw-r--r--Documentation/translations/zh_CN/process/cve.rst89
-rw-r--r--Documentation/translations/zh_CN/process/email-clients.rst17
-rw-r--r--Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst2
-rw-r--r--Documentation/translations/zh_CN/process/index.rst6
-rw-r--r--Documentation/translations/zh_CN/process/magic-number.rst2
-rw-r--r--Documentation/translations/zh_CN/process/programming-language.rst78
-rw-r--r--Documentation/translations/zh_CN/process/researcher-guidelines.rst129
-rw-r--r--Documentation/translations/zh_CN/process/security-bugs.rst84
-rw-r--r--Documentation/translations/zh_CN/process/submit-checklist.rst4
-rw-r--r--Documentation/translations/zh_CN/process/submitting-patches.rst29
-rw-r--r--Documentation/translations/zh_CN/rust/arch-support.rst14
-rw-r--r--Documentation/translations/zh_CN/rust/coding-guidelines.rst12
-rw-r--r--Documentation/translations/zh_CN/rust/general-information.rst3
-rw-r--r--Documentation/translations/zh_CN/rust/index.rst33
-rw-r--r--Documentation/translations/zh_CN/rust/quick-start.rst50
-rw-r--r--Documentation/translations/zh_CN/rust/testing.rst215
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-domains.rst10
-rw-r--r--Documentation/translations/zh_CN/scheduler/sched-stats.rst30
-rw-r--r--Documentation/translations/zh_CN/scsi/index.rst92
-rw-r--r--Documentation/translations/zh_CN/scsi/libsas.rst425
-rw-r--r--Documentation/translations/zh_CN/scsi/link_power_management_policy.rst32
-rw-r--r--Documentation/translations/zh_CN/scsi/scsi-parameters.rst118
-rw-r--r--Documentation/translations/zh_CN/scsi/scsi.rst48
-rw-r--r--Documentation/translations/zh_CN/scsi/scsi_eh.rst482
-rw-r--r--Documentation/translations/zh_CN/scsi/scsi_mid_low_api.rst1174
-rw-r--r--Documentation/translations/zh_CN/scsi/sd-parameters.rst38
-rw-r--r--Documentation/translations/zh_CN/scsi/wd719x.rst35
-rw-r--r--Documentation/translations/zh_CN/security/IMA-templates.rst97
-rw-r--r--Documentation/translations/zh_CN/security/SCTP.rst317
-rw-r--r--Documentation/translations/zh_CN/security/credentials.rst479
-rw-r--r--Documentation/translations/zh_CN/security/digsig.rst103
-rw-r--r--Documentation/translations/zh_CN/security/index.rst34
-rw-r--r--Documentation/translations/zh_CN/security/ipe.rst398
-rw-r--r--Documentation/translations/zh_CN/security/keys/index.rst22
-rw-r--r--Documentation/translations/zh_CN/security/landlock.rst123
-rw-r--r--Documentation/translations/zh_CN/security/lsm-development.rst19
-rw-r--r--Documentation/translations/zh_CN/security/lsm.rst95
-rw-r--r--Documentation/translations/zh_CN/security/sak.rst86
-rw-r--r--Documentation/translations/zh_CN/security/secrets/coco.rst96
-rw-r--r--Documentation/translations/zh_CN/security/secrets/index.rst14
-rw-r--r--Documentation/translations/zh_CN/security/self-protection.rst271
-rw-r--r--Documentation/translations/zh_CN/security/siphash.rst195
-rw-r--r--Documentation/translations/zh_CN/security/snp-tdx-threat-model.rst209
-rw-r--r--Documentation/translations/zh_CN/security/tpm/index.rst20
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm-security.rst151
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm_event_log.rst49
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm_ftpm_tee.rst31
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm_tis.rst43
-rw-r--r--Documentation/translations/zh_CN/security/tpm/tpm_vtpm_proxy.rst51
-rw-r--r--Documentation/translations/zh_CN/security/tpm/xen-tpmfront.rst114
-rw-r--r--Documentation/translations/zh_CN/staging/index.rst2
-rw-r--r--Documentation/translations/zh_CN/staging/speculation.rst85
-rw-r--r--Documentation/translations/zh_CN/subsystem-apis.rst5
-rw-r--r--Documentation/translations/zh_CN/userspace-api/accelerators/ocxl.rst4
-rw-r--r--Documentation/translations/zh_CN/video4linux/v4l2-framework.txt16
-rw-r--r--Documentation/translations/zh_CN/virt/guest-halt-polling.rst2
-rw-r--r--Documentation/translations/zh_TW/admin-guide/README.rst4
-rw-r--r--Documentation/translations/zh_TW/admin-guide/bug-hunting.rst14
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/damon/start.rst4
-rw-r--r--Documentation/translations/zh_TW/admin-guide/mm/damon/usage.rst260
-rw-r--r--Documentation/translations/zh_TW/admin-guide/reporting-issues.rst4
-rw-r--r--Documentation/translations/zh_TW/admin-guide/sysrq.rst22
-rw-r--r--Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst12
-rw-r--r--Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst3
-rw-r--r--Documentation/translations/zh_TW/dev-tools/gcov.rst8
-rw-r--r--Documentation/translations/zh_TW/dev-tools/gdb-kernel-debugging.rst36
-rw-r--r--Documentation/translations/zh_TW/dev-tools/kasan.rst39
-rw-r--r--Documentation/translations/zh_TW/filesystems/sysfs.txt2
-rw-r--r--Documentation/translations/zh_TW/gpio.txt591
-rw-r--r--Documentation/translations/zh_TW/process/4.Coding.rst2
-rw-r--r--Documentation/translations/zh_TW/process/5.Posting.rst4
-rw-r--r--Documentation/translations/zh_TW/process/coding-style.rst6
-rw-r--r--Documentation/translations/zh_TW/process/email-clients.rst8
-rw-r--r--Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst2
-rw-r--r--Documentation/translations/zh_TW/process/magic-number.rst2
-rw-r--r--Documentation/translations/zh_TW/process/submit-checklist.rst6
-rw-r--r--Documentation/translations/zh_TW/process/submitting-patches.rst10
-rw-r--r--Documentation/usb/CREDITS2
-rw-r--r--Documentation/usb/functionfs-desc.rst39
-rw-r--r--Documentation/usb/functionfs.rst38
-rw-r--r--Documentation/usb/gadget-testing.rst53
-rw-r--r--Documentation/usb/gadget_configfs.rst45
-rw-r--r--Documentation/usb/index.rst1
-rw-r--r--Documentation/usb/usbip_protocol.rst12
-rw-r--r--Documentation/userspace-api/accelerators/ocxl.rst7
-rw-r--r--Documentation/userspace-api/check_exec.rst144
-rw-r--r--Documentation/userspace-api/dma-buf-heaps.rst67
-rw-r--r--Documentation/userspace-api/fwctl/fwctl-cxl.rst142
-rw-r--r--Documentation/userspace-api/fwctl/fwctl.rst286
-rw-r--r--Documentation/userspace-api/fwctl/index.rst14
-rw-r--r--Documentation/userspace-api/fwctl/pds_fwctl.rst46
-rw-r--r--Documentation/userspace-api/gpio/chardev.rst116
-rw-r--r--Documentation/userspace-api/gpio/chardev_v1.rst131
-rw-r--r--Documentation/userspace-api/gpio/error-codes.rst79
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-chipinfo-ioctl.rst41
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-lineevent-ioctl.rst84
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-linehandle-ioctl.rst125
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-lineinfo-ioctl.rst54
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-lineinfo-unwatch-ioctl.rst49
-rw-r--r--Documentation/userspace-api/gpio/gpio-get-lineinfo-watch-ioctl.rst74
-rw-r--r--Documentation/userspace-api/gpio/gpio-handle-get-line-values-ioctl.rst63
-rw-r--r--Documentation/userspace-api/gpio/gpio-handle-set-config-ioctl.rst66
-rw-r--r--Documentation/userspace-api/gpio/gpio-handle-set-line-values-ioctl.rst55
-rw-r--r--Documentation/userspace-api/gpio/gpio-lineevent-data-read.rst89
-rw-r--r--Documentation/userspace-api/gpio/gpio-lineinfo-changed-read.rst87
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-get-line-ioctl.rst152
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-get-lineinfo-ioctl.rst50
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-get-lineinfo-watch-ioctl.rst67
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-line-event-read.rst88
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-line-get-values-ioctl.rst58
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-line-set-config-ioctl.rst61
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-line-set-values-ioctl.rst54
-rw-r--r--Documentation/userspace-api/gpio/gpio-v2-lineinfo-changed-read.rst81
-rw-r--r--Documentation/userspace-api/gpio/index.rst18
-rw-r--r--Documentation/userspace-api/gpio/obsolete.rst11
-rw-r--r--Documentation/userspace-api/gpio/sysfs.rst (renamed from Documentation/admin-guide/gpio/sysfs.rst)38
-rw-r--r--Documentation/userspace-api/index.rst56
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst555
-rw-r--r--Documentation/userspace-api/iommu.rst209
-rw-r--r--Documentation/userspace-api/iommufd.rst255
-rw-r--r--Documentation/userspace-api/landlock.rst281
-rw-r--r--Documentation/userspace-api/liveupdate.rst20
-rw-r--r--Documentation/userspace-api/media/Makefile64
-rw-r--r--Documentation/userspace-api/media/cec/cec-api.rst2
-rw-r--r--Documentation/userspace-api/media/cec/cec-func-open.rst4
-rw-r--r--Documentation/userspace-api/media/cec/cec-header.rst15
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-adap-g-caps.rst6
-rw-r--r--Documentation/userspace-api/media/cec/cec-ioc-receive.rst15
-rw-r--r--Documentation/userspace-api/media/cec/cec-pin-error-inj.rst42
-rw-r--r--Documentation/userspace-api/media/cec/cec.h.rst.exceptions (renamed from Documentation/userspace-api/media/cec.h.rst.exceptions)3
-rw-r--r--Documentation/userspace-api/media/dmx.h.rst.exceptions66
-rw-r--r--Documentation/userspace-api/media/drivers/camera-sensor.rst16
-rw-r--r--Documentation/userspace-api/media/drivers/ccs.rst6
-rw-r--r--Documentation/userspace-api/media/drivers/cx2341x-uapi.rst2
-rw-r--r--Documentation/userspace-api/media/drivers/index.rst3
-rw-r--r--Documentation/userspace-api/media/drivers/mali-c55.rst55
-rw-r--r--Documentation/userspace-api/media/drivers/uvcvideo.rst64
-rw-r--r--Documentation/userspace-api/media/drivers/vgxy61.rst (renamed from Documentation/userspace-api/media/drivers/st-vgxy61.rst)0
-rw-r--r--Documentation/userspace-api/media/dvb/ca.h.rst.exceptions (renamed from Documentation/userspace-api/media/ca.h.rst.exceptions)0
-rw-r--r--Documentation/userspace-api/media/dvb/dmx.h.rst.exceptions62
-rw-r--r--Documentation/userspace-api/media/dvb/dmx_types.rst1
-rw-r--r--Documentation/userspace-api/media/dvb/fe-diseqc-send-burst.rst2
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-tone.rst2
-rw-r--r--Documentation/userspace-api/media/dvb/fe-set-voltage.rst2
-rw-r--r--Documentation/userspace-api/media/dvb/fe_property_parameters.rst23
-rw-r--r--Documentation/userspace-api/media/dvb/frontend-property-terrestrial-systems.rst2
-rw-r--r--Documentation/userspace-api/media/dvb/frontend.h.rst.exceptions (renamed from Documentation/userspace-api/media/frontend.h.rst.exceptions)5
-rw-r--r--Documentation/userspace-api/media/dvb/frontend_f_open.rst2
-rw-r--r--Documentation/userspace-api/media/dvb/headers.rst48
-rw-r--r--Documentation/userspace-api/media/dvb/intro.rst4
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_apis.rst1
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_audio.rst1642
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_decoder_api.rst61
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_osd.rst883
-rw-r--r--Documentation/userspace-api/media/dvb/legacy_dvb_video.rst2430
-rw-r--r--Documentation/userspace-api/media/dvb/net.h.rst.exceptions (renamed from Documentation/userspace-api/media/net.h.rst.exceptions)0
-rw-r--r--Documentation/userspace-api/media/glossary.rst12
-rw-r--r--Documentation/userspace-api/media/mediactl/media-header.rst15
-rw-r--r--Documentation/userspace-api/media/mediactl/media-types.rst11
-rw-r--r--Documentation/userspace-api/media/mediactl/media.h.rst.exceptions (renamed from Documentation/userspace-api/media/media.h.rst.exceptions)3
-rw-r--r--Documentation/userspace-api/media/rc/lirc-header.rst18
-rw-r--r--Documentation/userspace-api/media/rc/lirc-set-send-duty-cycle.rst2
-rw-r--r--Documentation/userspace-api/media/rc/lirc.h.rst.exceptions (renamed from Documentation/userspace-api/media/lirc.h.rst.exceptions)0
-rw-r--r--Documentation/userspace-api/media/rc/rc-protos.rst4
-rw-r--r--Documentation/userspace-api/media/rc/rc-sysfs-nodes.rst2
-rw-r--r--Documentation/userspace-api/media/v4l/app-pri.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/audio.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/biblio.rst14
-rw-r--r--Documentation/userspace-api/media/v4l/buffer.rst37
-rw-r--r--Documentation/userspace-api/media/v4l/capture-example.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/capture.c.rst7
-rw-r--r--Documentation/userspace-api/media/v4l/colorspaces-defs.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/colorspaces-details.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/colorspaces.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/common-defs.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/common.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/compat.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/control.rst7
-rw-r--r--Documentation/userspace-api/media/v4l/crop.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/depth-formats.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-decoder.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-encoder.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-event.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-mem2mem.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-meta.rst22
-rw-r--r--Documentation/userspace-api/media/v4l/dev-osd.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-overlay.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-radio.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-sdr.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-sliced-vbi.rst4
-rw-r--r--Documentation/userspace-api/media/v4l/dev-stateless-decoder.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dev-subdev.rst42
-rw-r--r--Documentation/userspace-api/media/v4l/dev-touch.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/devices.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/dv-timings.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-camera.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst25
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-colorimetry.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-detect.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-dv.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-flash.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-fm-rx.rst12
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-fm-tx.rst22
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-image-process.rst3
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-image-source.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-jpeg.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/ext-ctrls-rf-tuner.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/extended-controls.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/field-order.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/fourcc.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/func-open.rst4
-rw-r--r--Documentation/userspace-api/media/v4l/hsv-formats.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/libv4l.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/meta-formats.rst10
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-arm-mali-c55.rst84
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-c3-isp.rst87
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-d4xx.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-generic.rst341
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-intel-ipu3.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-pisp-be.rst57
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-pisp-fe.rst40
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-rkisp1.rst58
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-uvc-msxu-1-5.rst23
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-uvc.rst5
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-vivid.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-vsp1-hgo.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/metafmt-vsp1-hgt.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/mmap.rst2
-rw-r--r--Documentation/userspace-api/media/v4l/mt2110t.svg315
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-bayer.rst3
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-cnf4.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-compressed.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-indexed.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-intro.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-inzi.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-m420.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-packed-hsv.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-packed-yuv.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-rawnn-cru.rst143
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-reserved.rst14
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-rgb.rst55
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-cs08.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-cs14le.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-cu08.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-cu16le.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-pcu16be.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-pcu18be.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-pcu20be.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-sdr-ru12le.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10-ipu3.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10alaw8.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10dpcm8.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb10p.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb12.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb12p.rst5
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb14.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb14p.rst3
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb16.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb8-pisp-comp.rst75
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-srggb8.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-tch-td08.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-tch-td16.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-tch-tu08.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-tch-tu16.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-uv8.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-v4l2-mplane.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-v4l2.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-y12i.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-y16i.rst74
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-y8i.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst57
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-yuv-planar.rst310
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt-z16.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/pixfmt.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/planar-apis.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/querycap.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/sdr-formats.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-configuration.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-examples.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-intro.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-targets.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api-vs-crop-api.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/selection-api.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/selections-common.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/standard.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/subdev-formats.rst846
-rw-r--r--Documentation/userspace-api/media/v4l/tch-formats.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/tuner.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/user-func.rst2
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2-isp.rst67
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2-selection-flags.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2-selection-targets.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2.rst3
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2grab-example.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/v4l2grab.c.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/video.rst1
-rw-r--r--Documentation/userspace-api/media/v4l/videodev.rst13
-rw-r--r--Documentation/userspace-api/media/v4l/videodev2.h.rst.exceptions614
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-enum-fmt.rst25
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst26
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-querycap.rst11
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst22
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-remove-bufs.rst86
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst41
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-client-cap.rst15
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-crop.rst6
-rw-r--r--Documentation/userspace-api/media/v4l/vidioc-subdev-g-routing.rst51
-rw-r--r--Documentation/userspace-api/media/v4l/yuv-formats.rst2
-rw-r--r--Documentation/userspace-api/media/videodev2.h.rst.exceptions603
-rw-r--r--Documentation/userspace-api/mfd_noexec.rst86
-rw-r--r--Documentation/userspace-api/mseal.rst209
-rw-r--r--Documentation/userspace-api/netlink/c-code-gen.rst4
-rw-r--r--Documentation/userspace-api/netlink/genetlink-legacy.rst22
-rw-r--r--Documentation/userspace-api/netlink/index.rst2
-rw-r--r--Documentation/userspace-api/netlink/intro-specs.rst12
-rw-r--r--Documentation/userspace-api/netlink/netlink-raw.rst46
-rw-r--r--Documentation/userspace-api/netlink/specs.rst2
-rw-r--r--Documentation/userspace-api/ntsync.rst385
-rw-r--r--Documentation/userspace-api/perf_ring_buffer.rst830
-rw-r--r--Documentation/userspace-api/spec_ctrl.rst6
-rw-r--r--Documentation/userspace-api/sysfs-platform_profile.rst42
-rw-r--r--Documentation/virt/coco/sev-guest.rst82
-rw-r--r--Documentation/virt/hyperv/clocks.rst21
-rw-r--r--Documentation/virt/hyperv/coco.rst397
-rw-r--r--Documentation/virt/hyperv/hibernation.rst336
-rw-r--r--Documentation/virt/hyperv/index.rst3
-rw-r--r--Documentation/virt/hyperv/overview.rst22
-rw-r--r--Documentation/virt/hyperv/vmbus.rst171
-rw-r--r--Documentation/virt/hyperv/vpci.rst316
-rw-r--r--Documentation/virt/kvm/api.rst1499
-rw-r--r--Documentation/virt/kvm/arm/fw-pseudo-registers.rst151
-rw-r--r--Documentation/virt/kvm/arm/hypercalls.rst337
-rw-r--r--Documentation/virt/kvm/arm/index.rst1
-rw-r--r--Documentation/virt/kvm/arm/ptp_kvm.rst38
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic-its.rst5
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic-v3.rst92
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic.rst2
-rw-r--r--Documentation/virt/kvm/devices/s390_flic.rst4
-rw-r--r--Documentation/virt/kvm/devices/vcpu.rst38
-rw-r--r--Documentation/virt/kvm/halt-polling.rst12
-rw-r--r--Documentation/virt/kvm/index.rst1
-rw-r--r--Documentation/virt/kvm/locking.rst116
-rw-r--r--Documentation/virt/kvm/loongarch/hypercalls.rst89
-rw-r--r--Documentation/virt/kvm/loongarch/index.rst10
-rw-r--r--Documentation/virt/kvm/review-checklist.rst95
-rw-r--r--Documentation/virt/kvm/s390/s390-diag.rst35
-rw-r--r--Documentation/virt/kvm/x86/amd-memory-encryption.rst211
-rw-r--r--Documentation/virt/kvm/x86/errata.rst39
-rw-r--r--Documentation/virt/kvm/x86/hypercalls.rst6
-rw-r--r--Documentation/virt/kvm/x86/index.rst1
-rw-r--r--Documentation/virt/kvm/x86/intel-tdx.rst268
-rw-r--r--Documentation/virt/kvm/x86/msr.rst19
-rw-r--r--Documentation/virt/uml/user_mode_linux_howto_v2.rst80
-rw-r--r--Documentation/w1/masters/ds2482.rst2
-rw-r--r--Documentation/w1/masters/index.rst3
-rw-r--r--Documentation/w1/masters/w1-uart.rst54
-rw-r--r--Documentation/w1/slaves/index.rst2
-rw-r--r--Documentation/w1/w1-netlink.rst2
-rw-r--r--Documentation/watchdog/convert_drivers_to_kernel_api.rst1
-rw-r--r--Documentation/watchdog/watchdog-api.rst2
-rw-r--r--Documentation/watchdog/watchdog-parameters.rst10
-rw-r--r--Documentation/wmi/acpi-interface.rst20
-rw-r--r--Documentation/wmi/devices/alienware-wmi.rst322
-rw-r--r--Documentation/wmi/devices/dell-wmi-ddv.rst50
-rw-r--r--Documentation/wmi/devices/lenovo-wmi-gamezone.rst202
-rw-r--r--Documentation/wmi/devices/lenovo-wmi-other.rst108
-rw-r--r--Documentation/wmi/devices/msi-wmi-platform.rst198
-rw-r--r--Documentation/wmi/devices/uniwill-laptop.rst198
-rw-r--r--Documentation/wmi/driver-development-guide.rst192
-rw-r--r--Documentation/wmi/index.rst1
6298 files changed, 328744 insertions, 91067 deletions
diff --git a/Documentation/.renames.txt b/Documentation/.renames.txt
new file mode 100644
index 000000000000..c0bd5d3dc8b9
--- /dev/null
+++ b/Documentation/.renames.txt
@@ -0,0 +1,1191 @@
+80211/cfg80211 driver-api/80211/cfg80211
+80211/index driver-api/80211/index
+80211/introduction driver-api/80211/introduction
+80211/mac80211 driver-api/80211/mac80211
+80211/mac80211-advanced driver-api/80211/mac80211-advanced
+EDID/howto admin-guide/edid
+PCI/picebus-howto PCI/pciebus-howto
+RAS/address-translation admin-guide/RAS/address-translation
+RAS/error-decoding admin-guide/RAS/error-decoding
+RAS/ras admin-guide/RAS/error-decoding
+accelerators/ocxl userspace-api/accelerators/ocxl
+admin-guide/gpio/sysfs userspace-api/gpio/sysfs
+admin-guide/l1tf admin-guide/hw-vuln/l1tf
+admin-guide/media/v4l-with-ir admin-guide/media/remote-controller
+admin-guide/ras admin-guide/RAS/main
+admin-guide/security-bugs process/security-bugs
+aoe/aoe admin-guide/aoe/aoe
+aoe/examples admin-guide/aoe/examples
+aoe/index admin-guide/aoe/index
+aoe/todo admin-guide/aoe/todo
+arc/arc arch/arc/arc
+arc/features arch/arc/features
+arc/index arch/arc/index
+arch/x86/resctrl filesystems/resctrl
+arm/arm arch/arm/arm
+arm/booting arch/arm/booting
+arm/cluster-pm-race-avoidance arch/arm/cluster-pm-race-avoidance
+arm/features arch/arm/features
+arm/firmware arch/arm/firmware
+arm/google/chromebook-boot-flow arch/arm/google/chromebook-boot-flow
+arm/index arch/arm/index
+arm/interrupts arch/arm/interrupts
+arm/ixp4xx arch/arm/ixp4xx
+arm/kernel_mode_neon arch/arm/kernel_mode_neon
+arm/kernel_user_helpers arch/arm/kernel_user_helpers
+arm/keystone/knav-qmss arch/arm/keystone/knav-qmss
+arm/keystone/overview arch/arm/keystone/overview
+arm/marvel arch/arm/marvell
+arm/marvell arch/arm/marvell
+arm/mem_alignment arch/arm/mem_alignment
+arm/memory arch/arm/memory
+arm/microchip arch/arm/microchip
+arm/netwinder arch/arm/netwinder
+arm/nwfpe/index arch/arm/nwfpe/index
+arm/nwfpe/netwinder-fpe arch/arm/nwfpe/netwinder-fpe
+arm/nwfpe/notes arch/arm/nwfpe/notes
+arm/nwfpe/nwfpe arch/arm/nwfpe/nwfpe
+arm/nwfpe/todo arch/arm/nwfpe/todo
+arm/omap/dss arch/arm/omap/dss
+arm/omap/index arch/arm/omap/index
+arm/omap/omap arch/arm/omap/omap
+arm/omap/omap_pm arch/arm/omap/omap_pm
+arm/porting arch/arm/porting
+arm/pxa/mfp arch/arm/pxa/mfp
+arm/sa1100/assabet arch/arm/sa1100/assabet
+arm/sa1100/cerf arch/arm/sa1100/cerf
+arm/sa1100/index arch/arm/sa1100/index
+arm/sa1100/lart arch/arm/sa1100/lart
+arm/sa1100/serial_uart arch/arm/sa1100/serial_uart
+arm/samsung/bootloader-interface arch/arm/samsung/bootloader-interface
+arm/samsung/gpio arch/arm/samsung/gpio
+arm/samsung/index arch/arm/samsung/index
+arm/samsung/overview arch/arm/samsung/overview
+arm/setup arch/arm/setup
+arm/spear/overview arch/arm/spear/overview
+arm/sti/overview arch/arm/sti/overview
+arm/sti/stih407-overview arch/arm/sti/stih407-overview
+arm/sti/stih418-overview arch/arm/sti/stih418-overview
+arm/stm32/overview arch/arm/stm32/overview
+arm/stm32/stm32-dma-mdma-chaining arch/arm/stm32/stm32-dma-mdma-chaining
+arm/stm32/stm32f429-overview arch/arm/stm32/stm32f429-overview
+arm/stm32/stm32f746-overview arch/arm/stm32/stm32f746-overview
+arm/stm32/stm32f769-overview arch/arm/stm32/stm32f769-overview
+arm/stm32/stm32h743-overview arch/arm/stm32/stm32h743-overview
+arm/stm32/stm32h750-overview arch/arm/stm32/stm32h750-overview
+arm/stm32/stm32mp13-overview arch/arm/stm32/stm32mp13-overview
+arm/stm32/stm32mp151-overview arch/arm/stm32/stm32mp151-overview
+arm/stm32/stm32mp157-overview arch/arm/stm32/stm32mp157-overview
+arm/sunxi arch/arm/sunxi
+arm/sunxi/clocks arch/arm/sunxi/clocks
+arm/swp_emulation arch/arm/swp_emulation
+arm/tcm arch/arm/tcm
+arm/uefi arch/arm/uefi
+arm/vfp/release-notes arch/arm/vfp/release-notes
+arm/vlocks arch/arm/vlocks
+arm64/acpi_object_usage arch/arm64/acpi_object_usage
+arm64/amu arch/arm64/amu
+arm64/arm-acpi arch/arm64/arm-acpi
+arm64/asymmetric-32bit arch/arm64/asymmetric-32bit
+arm64/booting arch/arm64/booting
+arm64/cpu-feature-registers arch/arm64/cpu-feature-registers
+arm64/elf_hwcaps arch/arm64/elf_hwcaps
+arm64/features arch/arm64/features
+arm64/hugetlbpage arch/arm64/hugetlbpage
+arm64/index arch/arm64/index
+arm64/legacy_instructions arch/arm64/legacy_instructions
+arm64/memory arch/arm64/memory
+arm64/memory-tagging-extension arch/arm64/memory-tagging-extension
+arm64/perf arch/arm64/perf
+arm64/pointer-authentication arch/arm64/pointer-authentication
+arm64/silicon-errata arch/arm64/silicon-errata
+arm64/sme arch/arm64/sme
+arm64/sve arch/arm64/sve
+arm64/tagged-address-abi arch/arm64/tagged-address-abi
+arm64/tagged-pointers arch/arm64/tagged-pointers
+asm-annotations core-api/asm-annotations
+auxdisplay/lcd-panel-cgram admin-guide/lcd-panel-cgram
+backlight/lp855x-driver driver-api/backlight/lp855x-driver
+blockdev/drbd/data-structure-v9 admin-guide/blockdev/drbd/data-structure-v9
+blockdev/drbd/figures admin-guide/blockdev/drbd/figures
+blockdev/drbd/index admin-guide/blockdev/drbd/index
+blockdev/floppy admin-guide/blockdev/floppy
+blockdev/index admin-guide/blockdev/index
+blockdev/nbd admin-guide/blockdev/nbd
+blockdev/paride admin-guide/blockdev/paride
+blockdev/ramdisk admin-guide/blockdev/ramdisk
+blockdev/zram admin-guide/blockdev/zram
+bpf/README bpf/index
+bpf/bpf_lsm bpf/prog_lsm
+bpf/instruction-set bpf/standardization/instruction-set
+bpf/libbpf/libbpf bpf/libbpf/index
+bpf/standardization/linux-notes bpf/linux-notes
+bus-devices/ti-gpmc driver-api/memory-devices/ti-gpmc
+cgroup-v1/blkio-controller admin-guide/cgroup-v1/blkio-controller
+cgroup-v1/cgroups admin-guide/cgroup-v1/cgroups
+cgroup-v1/cpuacct admin-guide/cgroup-v1/cpuacct
+cgroup-v1/cpusets admin-guide/cgroup-v1/cpusets
+cgroup-v1/devices admin-guide/cgroup-v1/devices
+cgroup-v1/freezer-subsystem admin-guide/cgroup-v1/freezer-subsystem
+cgroup-v1/hugetlb admin-guide/cgroup-v1/hugetlb
+cgroup-v1/index admin-guide/cgroup-v1/index
+cgroup-v1/memcg_test admin-guide/cgroup-v1/memcg_test
+cgroup-v1/memory admin-guide/cgroup-v1/memory
+cgroup-v1/net_cls admin-guide/cgroup-v1/net_cls
+cgroup-v1/net_prio admin-guide/cgroup-v1/net_prio
+cgroup-v1/pids admin-guide/cgroup-v1/pids
+cgroup-v1/rdma admin-guide/cgroup-v1/rdma
+cma/debugfs admin-guide/mm/cma_debugfs
+connector/connector driver-api/connector
+console/console driver-api/console
+core-api/gcc-plugins kbuild/gcc-plugins
+core-api/ioctl driver-api/ioctl
+core-api/memory-hotplug-notifier core-api/memory-hotplug
+dev-tools/gdb-kernel-debugging process/debugging/gdb-kernel-debugging
+dev-tools/kgdb process/debugging/kgdb
+dev-tools/tools dev-tools/index
+development-process/1.Intro process/1.Intro
+development-process/2.Process process/2.Process
+development-process/3.Early-stage process/3.Early-stage
+development-process/4.Coding process/4.Coding
+development-process/5.Posting process/5.Posting
+development-process/6.Followthrough process/6.Followthrough
+development-process/7.AdvancedTopics process/7.AdvancedTopics
+development-process/8.Conclusion process/8.Conclusion
+development-process/development-process process/development-process
+development-process/index process/index
+device-mapper/cache admin-guide/device-mapper/cache
+device-mapper/cache-policies admin-guide/device-mapper/cache-policies
+device-mapper/delay admin-guide/device-mapper/delay
+device-mapper/dm-crypt admin-guide/device-mapper/dm-crypt
+device-mapper/dm-flakey admin-guide/device-mapper/dm-flakey
+device-mapper/dm-init admin-guide/device-mapper/dm-init
+device-mapper/dm-integrity admin-guide/device-mapper/dm-integrity
+device-mapper/dm-io admin-guide/device-mapper/dm-io
+device-mapper/dm-log admin-guide/device-mapper/dm-log
+device-mapper/dm-queue-length admin-guide/device-mapper/dm-queue-length
+device-mapper/dm-raid admin-guide/device-mapper/dm-raid
+device-mapper/dm-service-time admin-guide/device-mapper/dm-service-time
+device-mapper/dm-uevent admin-guide/device-mapper/dm-uevent
+device-mapper/dm-zoned admin-guide/device-mapper/dm-zoned
+device-mapper/era admin-guide/device-mapper/era
+device-mapper/index admin-guide/device-mapper/index
+device-mapper/kcopyd admin-guide/device-mapper/kcopyd
+device-mapper/linear admin-guide/device-mapper/linear
+device-mapper/log-writes admin-guide/device-mapper/log-writes
+device-mapper/persistent-data admin-guide/device-mapper/persistent-data
+device-mapper/snapshot admin-guide/device-mapper/snapshot
+device-mapper/statistics admin-guide/device-mapper/statistics
+device-mapper/striped admin-guide/device-mapper/striped
+device-mapper/switch admin-guide/device-mapper/switch
+device-mapper/thin-provisioning admin-guide/device-mapper/thin-provisioning
+device-mapper/unstriped admin-guide/device-mapper/unstriped
+device-mapper/verity admin-guide/device-mapper/verity
+device-mapper/writecache admin-guide/device-mapper/writecache
+device-mapper/zero admin-guide/device-mapper/zero
+devicetree/writing-schema devicetree/bindings/writing-schema
+driver-api/bt8xxgpio driver-api/gpio/bt8xxgpio
+driver-api/cxl/access-coordinates driver-api/cxl/linux/access-coordinates
+driver-api/cxl/memory-devices driver-api/cxl/theory-of-operation
+driver-api/dcdbas userspace-api/dcdbas
+driver-api/dell_rbu admin-guide/dell_rbu
+driver-api/edid admin-guide/edid
+driver-api/gpio driver-api/gpio/index
+driver-api/hte/tegra194-hte driver-api/hte/tegra-hte
+driver-api/isapnp userspace-api/isapnp
+driver-api/media/drivers/v4l-drivers/zoran driver-api/media/drivers/zoran
+driver-api/mtd/intel-spi driver-api/mtd/spi-intel
+driver-api/pci driver-api/pci/pci
+driver-api/pinctl driver-api/pin-control
+driver-api/rapidio admin-guide/rapidio
+driver-api/serial/moxa-smartio driver-api/tty/moxa-smartio
+driver-api/serial/n_gsm driver-api/tty/n_gsm
+driver-api/serial/tty driver-api/tty/tty_ldisc
+driver-api/thermal/intel_powerclamp admin-guide/thermal/intel_powerclamp
+driver-api/usb driver-api/usb/usb
+driver-model/binding driver-api/driver-model/binding
+driver-model/bus driver-api/driver-model/bus
+driver-model/design-patterns driver-api/driver-model/design-patterns
+driver-model/device driver-api/driver-model/device
+driver-model/devres driver-api/driver-model/devres
+driver-model/driver driver-api/driver-model/driver
+driver-model/index driver-api/driver-model/index
+driver-model/overview driver-api/driver-model/overview
+driver-model/platform driver-api/driver-model/platform
+driver-model/porting driver-api/driver-model/porting
+early-userspace/buffer-format driver-api/early-userspace/buffer-format
+early-userspace/early_userspace_support driver-api/early-userspace/early_userspace_support
+early-userspace/index driver-api/early-userspace/index
+errseq core-api/errseq
+filesystems/binderfs admin-guide/binderfs
+filesystems/cifs/cifsd filesystems/smb/ksmbd
+filesystems/cifs/cifsroot filesystems/smb/cifsroot
+filesystems/cifs/index filesystems/smb/index
+filesystems/cifs/ksmbd filesystems/smb/ksmbd
+filesystems/ext4/ext4 admin-guide/ext4
+filesystems/ext4/ondisk/about filesystems/ext4/about
+filesystems/ext4/ondisk/allocators filesystems/ext4/allocators
+filesystems/ext4/ondisk/attributes filesystems/ext4/attributes
+filesystems/ext4/ondisk/bigalloc filesystems/ext4/bigalloc
+filesystems/ext4/ondisk/bitmaps filesystems/ext4/bitmaps
+filesystems/ext4/ondisk/blockgroup filesystems/ext4/blockgroup
+filesystems/ext4/ondisk/blockmap filesystems/ext4/blockmap
+filesystems/ext4/ondisk/blocks filesystems/ext4/blocks
+filesystems/ext4/ondisk/checksums filesystems/ext4/checksums
+filesystems/ext4/ondisk/directory filesystems/ext4/directory
+filesystems/ext4/ondisk/dynamic filesystems/ext4/dynamic
+filesystems/ext4/ondisk/eainode filesystems/ext4/eainode
+filesystems/ext4/ondisk/globals filesystems/ext4/globals
+filesystems/ext4/ondisk/group_descr filesystems/ext4/group_descr
+filesystems/ext4/ondisk/ifork filesystems/ext4/ifork
+filesystems/ext4/ondisk/inlinedata filesystems/ext4/inlinedata
+filesystems/ext4/ondisk/inodes filesystems/ext4/inodes
+filesystems/ext4/ondisk/journal filesystems/ext4/journal
+filesystems/ext4/ondisk/mmp filesystems/ext4/mmp
+filesystems/ext4/ondisk/overview filesystems/ext4/overview
+filesystems/ext4/ondisk/special_inodes filesystems/ext4/special_inodes
+filesystems/ext4/ondisk/super filesystems/ext4/super
+filesystems/sysfs-pci PCI/sysfs-pci
+filesystems/sysfs-tagging networking/sysfs-tagging
+filesystems/xfs-delayed-logging-design filesystems/xfs/xfs-delayed-logging-design
+filesystems/xfs-maintainer-entry-profile filesystems/xfs/xfs-maintainer-entry-profile
+filesystems/xfs-online-fsck-design filesystems/xfs/xfs-online-fsck-design
+filesystems/xfs-self-describing-metadata filesystems/xfs/xfs-self-describing-metadata
+gpio/index admin-guide/gpio/index
+gpio/sysfs userspace-api/gpio/sysfs
+gpu/amdgpu gpu/amdgpu/index
+hte/hte driver-api/hte/hte
+hte/index driver-api/hte/index
+hte/tegra194-hte driver-api/hte/tegra-hte
+input/alps input/devices/alps
+input/amijoy input/devices/amijoy
+input/appletouch input/devices/appletouch
+input/atarikbd input/devices/atarikbd
+input/bcm5974 input/devices/bcm5974
+input/cma3000_d0x input/devices/cma3000_d0x
+input/cs461x input/devices/cs461x
+input/edt-ft5x06 input/devices/edt-ft5x06
+input/elantech input/devices/elantech
+input/iforce-protocol input/devices/iforce-protocol
+input/joystick input/joydev/joystick
+input/joystick-api input/joydev/joystick-api
+input/joystick-parport input/devices/joystick-parport
+input/ntrig input/devices/ntrig
+input/rotary-encoder input/devices/rotary-encoder
+input/sentelic input/devices/sentelic
+input/walkera0701 input/devices/walkera0701
+input/xpad input/devices/xpad
+input/yealink input/devices/yealink
+interconnect/interconnect driver-api/interconnect
+ioctl/botching-up-ioctls process/botching-up-ioctls
+ioctl/cdrom userspace-api/ioctl/cdrom
+ioctl/hdio userspace-api/ioctl/hdio
+ioctl/index userspace-api/ioctl/index
+ioctl/ioctl-decoding userspace-api/ioctl/ioctl-decoding
+ioctl/ioctl-number userspace-api/ioctl/ioctl-number
+kbuild/namespaces core-api/symbol-namespaces
+kdump/index admin-guide/kdump/index
+kdump/kdump admin-guide/kdump/kdump
+kdump/vmcoreinfo admin-guide/kdump/vmcoreinfo
+kernel-documentation doc-guide/kernel-doc
+laptops/asus-laptop admin-guide/laptops/asus-laptop
+laptops/disk-shock-protection admin-guide/laptops/disk-shock-protection
+laptops/index admin-guide/laptops/index
+laptops/laptop-mode admin-guide/laptops/laptop-mode
+laptops/lg-laptop admin-guide/laptops/lg-laptop
+laptops/sony-laptop admin-guide/laptops/sony-laptop
+laptops/sonypi admin-guide/laptops/sonypi
+laptops/thinkpad-acpi admin-guide/laptops/thinkpad-acpi
+laptops/toshiba_haps admin-guide/laptops/toshiba_haps
+loongarch/booting arch/loongarch/booting
+loongarch/features arch/loongarch/features
+loongarch/index arch/loongarch/index
+loongarch/introduction arch/loongarch/introduction
+loongarch/irq-chip-model arch/loongarch/irq-chip-model
+m68k/buddha-driver arch/m68k/buddha-driver
+m68k/features arch/m68k/features
+m68k/index arch/m68k/index
+m68k/kernel-options arch/m68k/kernel-options
+md/index driver-api/md/index
+md/md-cluster driver-api/md/md-cluster
+md/raid5-cache driver-api/md/raid5-cache
+md/raid5-ppl driver-api/md/raid5-ppl
+media/dvb-drivers/avermedia admin-guide/media/avermedia
+media/dvb-drivers/bt8xx admin-guide/media/bt8xx
+media/dvb-drivers/ci admin-guide/media/ci
+media/dvb-drivers/contributors driver-api/media/drivers/contributors
+media/dvb-drivers/dvb-usb driver-api/media/drivers/dvb-usb
+media/dvb-drivers/faq admin-guide/media/faq
+media/dvb-drivers/frontends driver-api/media/drivers/frontends
+media/dvb-drivers/index driver-api/media/drivers/index
+media/dvb-drivers/lmedm04 admin-guide/media/lmedm04
+media/dvb-drivers/opera-firmware admin-guide/media/opera-firmware
+media/dvb-drivers/technisat admin-guide/media/technisat
+media/dvb-drivers/ttusb-dec admin-guide/media/ttusb-dec
+media/intro userspace-api/media/intro
+media/kapi/cec-core driver-api/media/cec-core
+media/kapi/dtv-ca driver-api/media/dtv-ca
+media/kapi/dtv-common driver-api/media/dtv-common
+media/kapi/dtv-core driver-api/media/dtv-core
+media/kapi/dtv-demux driver-api/media/dtv-demux
+media/kapi/dtv-frontend driver-api/media/dtv-frontend
+media/kapi/dtv-net driver-api/media/dtv-net
+media/kapi/mc-core driver-api/media/mc-core
+media/kapi/rc-core driver-api/media/rc-core
+media/kapi/v4l2-async driver-api/media/v4l2-async
+media/kapi/v4l2-common driver-api/media/v4l2-common
+media/kapi/v4l2-controls driver-api/media/v4l2-controls
+media/kapi/v4l2-core driver-api/media/v4l2-core
+media/kapi/v4l2-dev driver-api/media/v4l2-dev
+media/kapi/v4l2-device driver-api/media/v4l2-device
+media/kapi/v4l2-dv-timings driver-api/media/v4l2-dv-timings
+media/kapi/v4l2-event driver-api/media/v4l2-event
+media/kapi/v4l2-fh driver-api/media/v4l2-fh
+media/kapi/v4l2-flash-led-class driver-api/media/v4l2-flash-led-class
+media/kapi/v4l2-fwnode driver-api/media/v4l2-fwnode
+media/kapi/v4l2-intro driver-api/media/v4l2-intro
+media/kapi/v4l2-mc driver-api/media/v4l2-mc
+media/kapi/v4l2-mediabus driver-api/media/v4l2-mediabus
+media/kapi/v4l2-mem2mem driver-api/media/v4l2-mem2mem
+media/kapi/v4l2-rect driver-api/media/v4l2-rect
+media/kapi/v4l2-subdev driver-api/media/v4l2-subdev
+media/kapi/v4l2-tuner driver-api/media/v4l2-tuner
+media/kapi/v4l2-tveeprom driver-api/media/v4l2-tveeprom
+media/kapi/v4l2-videobuf2 driver-api/media/v4l2-videobuf2
+media/media_kapi driver-api/media/index
+media/media_uapi userspace-api/media/index
+media/uapi/cec/cec-api userspace-api/media/cec/cec-api
+media/uapi/cec/cec-func-close userspace-api/media/cec/cec-func-close
+media/uapi/cec/cec-func-ioctl userspace-api/media/cec/cec-func-ioctl
+media/uapi/cec/cec-func-open userspace-api/media/cec/cec-func-open
+media/uapi/cec/cec-func-poll userspace-api/media/cec/cec-func-poll
+media/uapi/cec/cec-funcs userspace-api/media/cec/cec-funcs
+media/uapi/cec/cec-header userspace-api/media/cec/cec-header
+media/uapi/cec/cec-intro userspace-api/media/cec/cec-intro
+media/uapi/cec/cec-ioc-adap-g-caps userspace-api/media/cec/cec-ioc-adap-g-caps
+media/uapi/cec/cec-ioc-adap-g-conn-info userspace-api/media/cec/cec-ioc-adap-g-conn-info
+media/uapi/cec/cec-ioc-adap-g-log-addrs userspace-api/media/cec/cec-ioc-adap-g-log-addrs
+media/uapi/cec/cec-ioc-adap-g-phys-addr userspace-api/media/cec/cec-ioc-adap-g-phys-addr
+media/uapi/cec/cec-ioc-dqevent userspace-api/media/cec/cec-ioc-dqevent
+media/uapi/cec/cec-ioc-g-mode userspace-api/media/cec/cec-ioc-g-mode
+media/uapi/cec/cec-ioc-receive userspace-api/media/cec/cec-ioc-receive
+media/uapi/cec/cec-pin-error-inj userspace-api/media/cec/cec-pin-error-inj
+media/uapi/dvb/ca userspace-api/media/dvb/ca
+media/uapi/dvb/ca-fclose userspace-api/media/dvb/ca-fclose
+media/uapi/dvb/ca-fopen userspace-api/media/dvb/ca-fopen
+media/uapi/dvb/ca-get-cap userspace-api/media/dvb/ca-get-cap
+media/uapi/dvb/ca-get-descr-info userspace-api/media/dvb/ca-get-descr-info
+media/uapi/dvb/ca-get-msg userspace-api/media/dvb/ca-get-msg
+media/uapi/dvb/ca-get-slot-info userspace-api/media/dvb/ca-get-slot-info
+media/uapi/dvb/ca-reset userspace-api/media/dvb/ca-reset
+media/uapi/dvb/ca-send-msg userspace-api/media/dvb/ca-send-msg
+media/uapi/dvb/ca-set-descr userspace-api/media/dvb/ca-set-descr
+media/uapi/dvb/ca_data_types userspace-api/media/dvb/ca_data_types
+media/uapi/dvb/ca_function_calls userspace-api/media/dvb/ca_function_calls
+media/uapi/dvb/ca_high_level userspace-api/media/dvb/ca_high_level
+media/uapi/dvb/demux userspace-api/media/dvb/demux
+media/uapi/dvb/dmx-add-pid userspace-api/media/dvb/dmx-add-pid
+media/uapi/dvb/dmx-expbuf userspace-api/media/dvb/dmx-expbuf
+media/uapi/dvb/dmx-fclose userspace-api/media/dvb/dmx-fclose
+media/uapi/dvb/dmx-fopen userspace-api/media/dvb/dmx-fopen
+media/uapi/dvb/dmx-fread userspace-api/media/dvb/dmx-fread
+media/uapi/dvb/dmx-fwrite userspace-api/media/dvb/dmx-fwrite
+media/uapi/dvb/dmx-get-pes-pids userspace-api/media/dvb/dmx-get-pes-pids
+media/uapi/dvb/dmx-get-stc userspace-api/media/dvb/dmx-get-stc
+media/uapi/dvb/dmx-mmap userspace-api/media/dvb/dmx-mmap
+media/uapi/dvb/dmx-munmap userspace-api/media/dvb/dmx-munmap
+media/uapi/dvb/dmx-qbuf userspace-api/media/dvb/dmx-qbuf
+media/uapi/dvb/dmx-querybuf userspace-api/media/dvb/dmx-querybuf
+media/uapi/dvb/dmx-remove-pid userspace-api/media/dvb/dmx-remove-pid
+media/uapi/dvb/dmx-reqbufs userspace-api/media/dvb/dmx-reqbufs
+media/uapi/dvb/dmx-set-buffer-size userspace-api/media/dvb/dmx-set-buffer-size
+media/uapi/dvb/dmx-set-filter userspace-api/media/dvb/dmx-set-filter
+media/uapi/dvb/dmx-set-pes-filter userspace-api/media/dvb/dmx-set-pes-filter
+media/uapi/dvb/dmx-start userspace-api/media/dvb/dmx-start
+media/uapi/dvb/dmx-stop userspace-api/media/dvb/dmx-stop
+media/uapi/dvb/dmx_fcalls userspace-api/media/dvb/dmx_fcalls
+media/uapi/dvb/dmx_types userspace-api/media/dvb/dmx_types
+media/uapi/dvb/dvb-fe-read-status userspace-api/media/dvb/dvb-fe-read-status
+media/uapi/dvb/dvb-frontend-event userspace-api/media/dvb/dvb-frontend-event
+media/uapi/dvb/dvb-frontend-parameters userspace-api/media/dvb/dvb-frontend-parameters
+media/uapi/dvb/dvbapi userspace-api/media/dvb/dvbapi
+media/uapi/dvb/dvbproperty userspace-api/media/dvb/dvbproperty
+media/uapi/dvb/examples userspace-api/media/dvb/examples
+media/uapi/dvb/fe-bandwidth-t userspace-api/media/dvb/fe-bandwidth-t
+media/uapi/dvb/fe-diseqc-recv-slave-reply userspace-api/media/dvb/fe-diseqc-recv-slave-reply
+media/uapi/dvb/fe-diseqc-reset-overload userspace-api/media/dvb/fe-diseqc-reset-overload
+media/uapi/dvb/fe-diseqc-send-burst userspace-api/media/dvb/fe-diseqc-send-burst
+media/uapi/dvb/fe-diseqc-send-master-cmd userspace-api/media/dvb/fe-diseqc-send-master-cmd
+media/uapi/dvb/fe-dishnetwork-send-legacy-cmd userspace-api/media/dvb/fe-dishnetwork-send-legacy-cmd
+media/uapi/dvb/fe-enable-high-lnb-voltage userspace-api/media/dvb/fe-enable-high-lnb-voltage
+media/uapi/dvb/fe-get-event userspace-api/media/dvb/fe-get-event
+media/uapi/dvb/fe-get-frontend userspace-api/media/dvb/fe-get-frontend
+media/uapi/dvb/fe-get-info userspace-api/media/dvb/fe-get-info
+media/uapi/dvb/fe-get-property userspace-api/media/dvb/fe-get-property
+media/uapi/dvb/fe-read-ber userspace-api/media/dvb/fe-read-ber
+media/uapi/dvb/fe-read-signal-strength userspace-api/media/dvb/fe-read-signal-strength
+media/uapi/dvb/fe-read-snr userspace-api/media/dvb/fe-read-snr
+media/uapi/dvb/fe-read-status userspace-api/media/dvb/fe-read-status
+media/uapi/dvb/fe-read-uncorrected-blocks userspace-api/media/dvb/fe-read-uncorrected-blocks
+media/uapi/dvb/fe-set-frontend userspace-api/media/dvb/fe-set-frontend
+media/uapi/dvb/fe-set-frontend-tune-mode userspace-api/media/dvb/fe-set-frontend-tune-mode
+media/uapi/dvb/fe-set-tone userspace-api/media/dvb/fe-set-tone
+media/uapi/dvb/fe-set-voltage userspace-api/media/dvb/fe-set-voltage
+media/uapi/dvb/fe-type-t userspace-api/media/dvb/fe-type-t
+media/uapi/dvb/fe_property_parameters userspace-api/media/dvb/fe_property_parameters
+media/uapi/dvb/frontend userspace-api/media/dvb/frontend
+media/uapi/dvb/frontend-header userspace-api/media/dvb/frontend-header
+media/uapi/dvb/frontend-property-cable-systems userspace-api/media/dvb/frontend-property-cable-systems
+media/uapi/dvb/frontend-property-satellite-systems userspace-api/media/dvb/frontend-property-satellite-systems
+media/uapi/dvb/frontend-property-terrestrial-systems userspace-api/media/dvb/frontend-property-terrestrial-systems
+media/uapi/dvb/frontend-stat-properties userspace-api/media/dvb/frontend-stat-properties
+media/uapi/dvb/frontend_f_close userspace-api/media/dvb/frontend_f_close
+media/uapi/dvb/frontend_f_open userspace-api/media/dvb/frontend_f_open
+media/uapi/dvb/frontend_fcalls userspace-api/media/dvb/frontend_fcalls
+media/uapi/dvb/frontend_legacy_api userspace-api/media/dvb/frontend_legacy_api
+media/uapi/dvb/frontend_legacy_dvbv3_api userspace-api/media/dvb/frontend_legacy_dvbv3_api
+media/uapi/dvb/headers userspace-api/media/dvb/headers
+media/uapi/dvb/intro userspace-api/media/dvb/intro
+media/uapi/dvb/legacy_dvb_apis userspace-api/media/dvb/legacy_dvb_apis
+media/uapi/dvb/net userspace-api/media/dvb/net
+media/uapi/dvb/net-add-if userspace-api/media/dvb/net-add-if
+media/uapi/dvb/net-get-if userspace-api/media/dvb/net-get-if
+media/uapi/dvb/net-remove-if userspace-api/media/dvb/net-remove-if
+media/uapi/dvb/net-types userspace-api/media/dvb/net-types
+media/uapi/dvb/query-dvb-frontend-info userspace-api/media/dvb/query-dvb-frontend-info
+media/uapi/fdl-appendix userspace-api/media/fdl-appendix
+media/uapi/gen-errors userspace-api/media/gen-errors
+media/uapi/mediactl/media-controller userspace-api/media/mediactl/media-controller
+media/uapi/mediactl/media-controller-intro userspace-api/media/mediactl/media-controller-intro
+media/uapi/mediactl/media-controller-model userspace-api/media/mediactl/media-controller-model
+media/uapi/mediactl/media-func-close userspace-api/media/mediactl/media-func-close
+media/uapi/mediactl/media-func-ioctl userspace-api/media/mediactl/media-func-ioctl
+media/uapi/mediactl/media-func-open userspace-api/media/mediactl/media-func-open
+media/uapi/mediactl/media-funcs userspace-api/media/mediactl/media-funcs
+media/uapi/mediactl/media-header userspace-api/media/mediactl/media-header
+media/uapi/mediactl/media-ioc-device-info userspace-api/media/mediactl/media-ioc-device-info
+media/uapi/mediactl/media-ioc-enum-entities userspace-api/media/mediactl/media-ioc-enum-entities
+media/uapi/mediactl/media-ioc-enum-links userspace-api/media/mediactl/media-ioc-enum-links
+media/uapi/mediactl/media-ioc-g-topology userspace-api/media/mediactl/media-ioc-g-topology
+media/uapi/mediactl/media-ioc-request-alloc userspace-api/media/mediactl/media-ioc-request-alloc
+media/uapi/mediactl/media-ioc-setup-link userspace-api/media/mediactl/media-ioc-setup-link
+media/uapi/mediactl/media-request-ioc-queue userspace-api/media/mediactl/media-request-ioc-queue
+media/uapi/mediactl/media-request-ioc-reinit userspace-api/media/mediactl/media-request-ioc-reinit
+media/uapi/mediactl/media-types userspace-api/media/mediactl/media-types
+media/uapi/mediactl/request-api userspace-api/media/mediactl/request-api
+media/uapi/mediactl/request-func-close userspace-api/media/mediactl/request-func-close
+media/uapi/mediactl/request-func-ioctl userspace-api/media/mediactl/request-func-ioctl
+media/uapi/mediactl/request-func-poll userspace-api/media/mediactl/request-func-poll
+media/uapi/rc/keytable.c userspace-api/media/rc/keytable.c
+media/uapi/rc/lirc-dev userspace-api/media/rc/lirc-dev
+media/uapi/rc/lirc-dev-intro userspace-api/media/rc/lirc-dev-intro
+media/uapi/rc/lirc-func userspace-api/media/rc/lirc-func
+media/uapi/rc/lirc-get-features userspace-api/media/rc/lirc-get-features
+media/uapi/rc/lirc-get-rec-mode userspace-api/media/rc/lirc-get-rec-mode
+media/uapi/rc/lirc-get-rec-resolution userspace-api/media/rc/lirc-get-rec-resolution
+media/uapi/rc/lirc-get-send-mode userspace-api/media/rc/lirc-get-send-mode
+media/uapi/rc/lirc-get-timeout userspace-api/media/rc/lirc-get-timeout
+media/uapi/rc/lirc-header userspace-api/media/rc/lirc-header
+media/uapi/rc/lirc-read userspace-api/media/rc/lirc-read
+media/uapi/rc/lirc-set-measure-carrier-mode userspace-api/media/rc/lirc-set-measure-carrier-mode
+media/uapi/rc/lirc-set-rec-carrier userspace-api/media/rc/lirc-set-rec-carrier
+media/uapi/rc/lirc-set-rec-carrier-range userspace-api/media/rc/lirc-set-rec-carrier-range
+media/uapi/rc/lirc-set-rec-timeout userspace-api/media/rc/lirc-set-rec-timeout
+media/uapi/rc/lirc-set-send-carrier userspace-api/media/rc/lirc-set-send-carrier
+media/uapi/rc/lirc-set-send-duty-cycle userspace-api/media/rc/lirc-set-send-duty-cycle
+media/uapi/rc/lirc-set-transmitter-mask userspace-api/media/rc/lirc-set-transmitter-mask
+media/uapi/rc/lirc-set-wideband-receiver userspace-api/media/rc/lirc-set-wideband-receiver
+media/uapi/rc/lirc-write userspace-api/media/rc/lirc-write
+media/uapi/rc/rc-intro userspace-api/media/rc/rc-intro
+media/uapi/rc/rc-protos userspace-api/media/rc/rc-protos
+media/uapi/rc/rc-sysfs-nodes userspace-api/media/rc/rc-sysfs-nodes
+media/uapi/rc/rc-table-change userspace-api/media/rc/rc-table-change
+media/uapi/rc/rc-tables userspace-api/media/rc/rc-tables
+media/uapi/rc/remote_controllers userspace-api/media/rc/remote_controllers
+media/uapi/v4l/app-pri userspace-api/media/v4l/app-pri
+media/uapi/v4l/audio userspace-api/media/v4l/audio
+media/uapi/v4l/biblio userspace-api/media/v4l/biblio
+media/uapi/v4l/buffer userspace-api/media/v4l/buffer
+media/uapi/v4l/capture-example userspace-api/media/v4l/capture-example
+media/uapi/v4l/capture.c userspace-api/media/v4l/capture.c
+media/uapi/v4l/colorspaces userspace-api/media/v4l/colorspaces
+media/uapi/v4l/colorspaces-defs userspace-api/media/v4l/colorspaces-defs
+media/uapi/v4l/colorspaces-details userspace-api/media/v4l/colorspaces-details
+media/uapi/v4l/common userspace-api/media/v4l/common
+media/uapi/v4l/common-defs userspace-api/media/v4l/common-defs
+media/uapi/v4l/compat userspace-api/media/v4l/compat
+media/uapi/v4l/control userspace-api/media/v4l/control
+media/uapi/v4l/crop userspace-api/media/v4l/crop
+media/uapi/v4l/depth-formats userspace-api/media/v4l/depth-formats
+media/uapi/v4l/dev-capture userspace-api/media/v4l/dev-capture
+media/uapi/v4l/dev-codec userspace-api/media/v4l/dev-mem2mem
+media/uapi/v4l/dev-decoder userspace-api/media/v4l/dev-decoder
+media/uapi/v4l/dev-event userspace-api/media/v4l/dev-event
+media/uapi/v4l/dev-mem2mem userspace-api/media/v4l/dev-mem2mem
+media/uapi/v4l/dev-meta userspace-api/media/v4l/dev-meta
+media/uapi/v4l/dev-osd userspace-api/media/v4l/dev-osd
+media/uapi/v4l/dev-output userspace-api/media/v4l/dev-output
+media/uapi/v4l/dev-overlay userspace-api/media/v4l/dev-overlay
+media/uapi/v4l/dev-radio userspace-api/media/v4l/dev-radio
+media/uapi/v4l/dev-raw-vbi userspace-api/media/v4l/dev-raw-vbi
+media/uapi/v4l/dev-rds userspace-api/media/v4l/dev-rds
+media/uapi/v4l/dev-sdr userspace-api/media/v4l/dev-sdr
+media/uapi/v4l/dev-sliced-vbi userspace-api/media/v4l/dev-sliced-vbi
+media/uapi/v4l/dev-stateless-decoder userspace-api/media/v4l/dev-stateless-decoder
+media/uapi/v4l/dev-subdev userspace-api/media/v4l/dev-subdev
+media/uapi/v4l/dev-touch userspace-api/media/v4l/dev-touch
+media/uapi/v4l/devices userspace-api/media/v4l/devices
+media/uapi/v4l/diff-v4l userspace-api/media/v4l/diff-v4l
+media/uapi/v4l/dmabuf userspace-api/media/v4l/dmabuf
+media/uapi/v4l/dv-timings userspace-api/media/v4l/dv-timings
+media/uapi/v4l/ext-ctrls-camera userspace-api/media/v4l/ext-ctrls-camera
+media/uapi/v4l/ext-ctrls-codec userspace-api/media/v4l/ext-ctrls-codec
+media/uapi/v4l/ext-ctrls-detect userspace-api/media/v4l/ext-ctrls-detect
+media/uapi/v4l/ext-ctrls-dv userspace-api/media/v4l/ext-ctrls-dv
+media/uapi/v4l/ext-ctrls-flash userspace-api/media/v4l/ext-ctrls-flash
+media/uapi/v4l/ext-ctrls-fm-rx userspace-api/media/v4l/ext-ctrls-fm-rx
+media/uapi/v4l/ext-ctrls-fm-tx userspace-api/media/v4l/ext-ctrls-fm-tx
+media/uapi/v4l/ext-ctrls-image-process userspace-api/media/v4l/ext-ctrls-image-process
+media/uapi/v4l/ext-ctrls-image-source userspace-api/media/v4l/ext-ctrls-image-source
+media/uapi/v4l/ext-ctrls-jpeg userspace-api/media/v4l/ext-ctrls-jpeg
+media/uapi/v4l/ext-ctrls-rf-tuner userspace-api/media/v4l/ext-ctrls-rf-tuner
+media/uapi/v4l/extended-controls userspace-api/media/v4l/extended-controls
+media/uapi/v4l/field-order userspace-api/media/v4l/field-order
+media/uapi/v4l/format userspace-api/media/v4l/format
+media/uapi/v4l/func-close userspace-api/media/v4l/func-close
+media/uapi/v4l/func-ioctl userspace-api/media/v4l/func-ioctl
+media/uapi/v4l/func-mmap userspace-api/media/v4l/func-mmap
+media/uapi/v4l/func-munmap userspace-api/media/v4l/func-munmap
+media/uapi/v4l/func-open userspace-api/media/v4l/func-open
+media/uapi/v4l/func-poll userspace-api/media/v4l/func-poll
+media/uapi/v4l/func-read userspace-api/media/v4l/func-read
+media/uapi/v4l/func-select userspace-api/media/v4l/func-select
+media/uapi/v4l/func-write userspace-api/media/v4l/func-write
+media/uapi/v4l/hist-v4l2 userspace-api/media/v4l/hist-v4l2
+media/uapi/v4l/hsv-formats userspace-api/media/v4l/hsv-formats
+media/uapi/v4l/io userspace-api/media/v4l/io
+media/uapi/v4l/libv4l userspace-api/media/v4l/libv4l
+media/uapi/v4l/libv4l-introduction userspace-api/media/v4l/libv4l-introduction
+media/uapi/v4l/meta-formats userspace-api/media/v4l/meta-formats
+media/uapi/v4l/mmap userspace-api/media/v4l/mmap
+media/uapi/v4l/open userspace-api/media/v4l/open
+media/uapi/v4l/pixfmt userspace-api/media/v4l/pixfmt
+media/uapi/v4l/pixfmt-002 userspace-api/media/v4l/pixfmt-v4l2
+media/uapi/v4l/pixfmt-003 userspace-api/media/v4l/pixfmt-v4l2-mplane
+media/uapi/v4l/pixfmt-004 userspace-api/media/v4l/pixfmt-intro
+media/uapi/v4l/pixfmt-006 userspace-api/media/v4l/colorspaces-defs
+media/uapi/v4l/pixfmt-007 userspace-api/media/v4l/colorspaces-details
+media/uapi/v4l/pixfmt-013 userspace-api/media/v4l/pixfmt-compressed
+media/uapi/v4l/pixfmt-bayer userspace-api/media/v4l/pixfmt-bayer
+media/uapi/v4l/pixfmt-cnf4 userspace-api/media/v4l/pixfmt-cnf4
+media/uapi/v4l/pixfmt-compressed userspace-api/media/v4l/pixfmt-compressed
+media/uapi/v4l/pixfmt-indexed userspace-api/media/v4l/pixfmt-indexed
+media/uapi/v4l/pixfmt-intro userspace-api/media/v4l/pixfmt-intro
+media/uapi/v4l/pixfmt-inzi userspace-api/media/v4l/pixfmt-inzi
+media/uapi/v4l/pixfmt-m420 userspace-api/media/v4l/pixfmt-m420
+media/uapi/v4l/pixfmt-meta-d4xx userspace-api/media/v4l/metafmt-d4xx
+media/uapi/v4l/pixfmt-meta-intel-ipu3 userspace-api/media/v4l/metafmt-intel-ipu3
+media/uapi/v4l/pixfmt-meta-uvc userspace-api/media/v4l/metafmt-uvc
+media/uapi/v4l/pixfmt-meta-vivid userspace-api/media/v4l/metafmt-vivid
+media/uapi/v4l/pixfmt-meta-vsp1-hgo userspace-api/media/v4l/metafmt-vsp1-hgo
+media/uapi/v4l/pixfmt-meta-vsp1-hgt userspace-api/media/v4l/metafmt-vsp1-hgt
+media/uapi/v4l/pixfmt-packed-hsv userspace-api/media/v4l/pixfmt-packed-hsv
+media/uapi/v4l/pixfmt-packed-yuv userspace-api/media/v4l/pixfmt-packed-yuv
+media/uapi/v4l/pixfmt-reserved userspace-api/media/v4l/pixfmt-reserved
+media/uapi/v4l/pixfmt-rgb userspace-api/media/v4l/pixfmt-rgb
+media/uapi/v4l/pixfmt-sbggr16 userspace-api/media/v4l/pixfmt-srggb16
+media/uapi/v4l/pixfmt-sdr-cs08 userspace-api/media/v4l/pixfmt-sdr-cs08
+media/uapi/v4l/pixfmt-sdr-cs14le userspace-api/media/v4l/pixfmt-sdr-cs14le
+media/uapi/v4l/pixfmt-sdr-cu08 userspace-api/media/v4l/pixfmt-sdr-cu08
+media/uapi/v4l/pixfmt-sdr-cu16le userspace-api/media/v4l/pixfmt-sdr-cu16le
+media/uapi/v4l/pixfmt-sdr-pcu16be userspace-api/media/v4l/pixfmt-sdr-pcu16be
+media/uapi/v4l/pixfmt-sdr-pcu18be userspace-api/media/v4l/pixfmt-sdr-pcu18be
+media/uapi/v4l/pixfmt-sdr-pcu20be userspace-api/media/v4l/pixfmt-sdr-pcu20be
+media/uapi/v4l/pixfmt-sdr-ru12le userspace-api/media/v4l/pixfmt-sdr-ru12le
+media/uapi/v4l/pixfmt-srggb10 userspace-api/media/v4l/pixfmt-srggb10
+media/uapi/v4l/pixfmt-srggb10-ipu3 userspace-api/media/v4l/pixfmt-srggb10-ipu3
+media/uapi/v4l/pixfmt-srggb10alaw8 userspace-api/media/v4l/pixfmt-srggb10alaw8
+media/uapi/v4l/pixfmt-srggb10dpcm8 userspace-api/media/v4l/pixfmt-srggb10dpcm8
+media/uapi/v4l/pixfmt-srggb10p userspace-api/media/v4l/pixfmt-srggb10p
+media/uapi/v4l/pixfmt-srggb12 userspace-api/media/v4l/pixfmt-srggb12
+media/uapi/v4l/pixfmt-srggb12p userspace-api/media/v4l/pixfmt-srggb12p
+media/uapi/v4l/pixfmt-srggb14 userspace-api/media/v4l/pixfmt-srggb14
+media/uapi/v4l/pixfmt-srggb14p userspace-api/media/v4l/pixfmt-srggb14p
+media/uapi/v4l/pixfmt-srggb16 userspace-api/media/v4l/pixfmt-srggb16
+media/uapi/v4l/pixfmt-srggb8 userspace-api/media/v4l/pixfmt-srggb8
+media/uapi/v4l/pixfmt-tch-td08 userspace-api/media/v4l/pixfmt-tch-td08
+media/uapi/v4l/pixfmt-tch-td16 userspace-api/media/v4l/pixfmt-tch-td16
+media/uapi/v4l/pixfmt-tch-tu08 userspace-api/media/v4l/pixfmt-tch-tu08
+media/uapi/v4l/pixfmt-tch-tu16 userspace-api/media/v4l/pixfmt-tch-tu16
+media/uapi/v4l/pixfmt-uv8 userspace-api/media/v4l/pixfmt-uv8
+media/uapi/v4l/pixfmt-v4l2 userspace-api/media/v4l/pixfmt-v4l2
+media/uapi/v4l/pixfmt-v4l2-mplane userspace-api/media/v4l/pixfmt-v4l2-mplane
+media/uapi/v4l/pixfmt-y12i userspace-api/media/v4l/pixfmt-y12i
+media/uapi/v4l/pixfmt-y8i userspace-api/media/v4l/pixfmt-y8i
+media/uapi/v4l/pixfmt-z16 userspace-api/media/v4l/pixfmt-z16
+media/uapi/v4l/planar-apis userspace-api/media/v4l/planar-apis
+media/uapi/v4l/querycap userspace-api/media/v4l/querycap
+media/uapi/v4l/rw userspace-api/media/v4l/rw
+media/uapi/v4l/sdr-formats userspace-api/media/v4l/sdr-formats
+media/uapi/v4l/selection-api userspace-api/media/v4l/selection-api
+media/uapi/v4l/selection-api-002 userspace-api/media/v4l/selection-api-intro
+media/uapi/v4l/selection-api-003 userspace-api/media/v4l/selection-api-targets
+media/uapi/v4l/selection-api-004 userspace-api/media/v4l/selection-api-configuration
+media/uapi/v4l/selection-api-005 userspace-api/media/v4l/selection-api-vs-crop-api
+media/uapi/v4l/selection-api-006 userspace-api/media/v4l/selection-api-examples
+media/uapi/v4l/selection-api-configuration userspace-api/media/v4l/selection-api-configuration
+media/uapi/v4l/selection-api-examples userspace-api/media/v4l/selection-api-examples
+media/uapi/v4l/selection-api-intro userspace-api/media/v4l/selection-api-intro
+media/uapi/v4l/selection-api-targets userspace-api/media/v4l/selection-api-targets
+media/uapi/v4l/selection-api-vs-crop-api userspace-api/media/v4l/selection-api-vs-crop-api
+media/uapi/v4l/selections-common userspace-api/media/v4l/selections-common
+media/uapi/v4l/standard userspace-api/media/v4l/standard
+media/uapi/v4l/streaming-par userspace-api/media/v4l/streaming-par
+media/uapi/v4l/subdev-formats userspace-api/media/v4l/subdev-formats
+media/uapi/v4l/tch-formats userspace-api/media/v4l/tch-formats
+media/uapi/v4l/tuner userspace-api/media/v4l/tuner
+media/uapi/v4l/user-func userspace-api/media/v4l/user-func
+media/uapi/v4l/userp userspace-api/media/v4l/userp
+media/uapi/v4l/v4l2 userspace-api/media/v4l/v4l2
+media/uapi/v4l/v4l2-selection-flags userspace-api/media/v4l/v4l2-selection-flags
+media/uapi/v4l/v4l2-selection-targets userspace-api/media/v4l/v4l2-selection-targets
+media/uapi/v4l/v4l2grab-example userspace-api/media/v4l/v4l2grab-example
+media/uapi/v4l/v4l2grab.c userspace-api/media/v4l/v4l2grab.c
+media/uapi/v4l/video userspace-api/media/v4l/video
+media/uapi/v4l/videodev userspace-api/media/v4l/videodev
+media/uapi/v4l/vidioc-create-bufs userspace-api/media/v4l/vidioc-create-bufs
+media/uapi/v4l/vidioc-cropcap userspace-api/media/v4l/vidioc-cropcap
+media/uapi/v4l/vidioc-dbg-g-chip-info userspace-api/media/v4l/vidioc-dbg-g-chip-info
+media/uapi/v4l/vidioc-dbg-g-register userspace-api/media/v4l/vidioc-dbg-g-register
+media/uapi/v4l/vidioc-decoder-cmd userspace-api/media/v4l/vidioc-decoder-cmd
+media/uapi/v4l/vidioc-dqevent userspace-api/media/v4l/vidioc-dqevent
+media/uapi/v4l/vidioc-dv-timings-cap userspace-api/media/v4l/vidioc-dv-timings-cap
+media/uapi/v4l/vidioc-encoder-cmd userspace-api/media/v4l/vidioc-encoder-cmd
+media/uapi/v4l/vidioc-enum-dv-timings userspace-api/media/v4l/vidioc-enum-dv-timings
+media/uapi/v4l/vidioc-enum-fmt userspace-api/media/v4l/vidioc-enum-fmt
+media/uapi/v4l/vidioc-enum-frameintervals userspace-api/media/v4l/vidioc-enum-frameintervals
+media/uapi/v4l/vidioc-enum-framesizes userspace-api/media/v4l/vidioc-enum-framesizes
+media/uapi/v4l/vidioc-enum-freq-bands userspace-api/media/v4l/vidioc-enum-freq-bands
+media/uapi/v4l/vidioc-enumaudio userspace-api/media/v4l/vidioc-enumaudio
+media/uapi/v4l/vidioc-enumaudioout userspace-api/media/v4l/vidioc-enumaudioout
+media/uapi/v4l/vidioc-enuminput userspace-api/media/v4l/vidioc-enuminput
+media/uapi/v4l/vidioc-enumoutput userspace-api/media/v4l/vidioc-enumoutput
+media/uapi/v4l/vidioc-enumstd userspace-api/media/v4l/vidioc-enumstd
+media/uapi/v4l/vidioc-expbuf userspace-api/media/v4l/vidioc-expbuf
+media/uapi/v4l/vidioc-g-audio userspace-api/media/v4l/vidioc-g-audio
+media/uapi/v4l/vidioc-g-audioout userspace-api/media/v4l/vidioc-g-audioout
+media/uapi/v4l/vidioc-g-crop userspace-api/media/v4l/vidioc-g-crop
+media/uapi/v4l/vidioc-g-ctrl userspace-api/media/v4l/vidioc-g-ctrl
+media/uapi/v4l/vidioc-g-dv-timings userspace-api/media/v4l/vidioc-g-dv-timings
+media/uapi/v4l/vidioc-g-edid userspace-api/media/v4l/vidioc-g-edid
+media/uapi/v4l/vidioc-g-enc-index userspace-api/media/v4l/vidioc-g-enc-index
+media/uapi/v4l/vidioc-g-ext-ctrls userspace-api/media/v4l/vidioc-g-ext-ctrls
+media/uapi/v4l/vidioc-g-fbuf userspace-api/media/v4l/vidioc-g-fbuf
+media/uapi/v4l/vidioc-g-fmt userspace-api/media/v4l/vidioc-g-fmt
+media/uapi/v4l/vidioc-g-frequency userspace-api/media/v4l/vidioc-g-frequency
+media/uapi/v4l/vidioc-g-input userspace-api/media/v4l/vidioc-g-input
+media/uapi/v4l/vidioc-g-jpegcomp userspace-api/media/v4l/vidioc-g-jpegcomp
+media/uapi/v4l/vidioc-g-modulator userspace-api/media/v4l/vidioc-g-modulator
+media/uapi/v4l/vidioc-g-output userspace-api/media/v4l/vidioc-g-output
+media/uapi/v4l/vidioc-g-parm userspace-api/media/v4l/vidioc-g-parm
+media/uapi/v4l/vidioc-g-priority userspace-api/media/v4l/vidioc-g-priority
+media/uapi/v4l/vidioc-g-selection userspace-api/media/v4l/vidioc-g-selection
+media/uapi/v4l/vidioc-g-sliced-vbi-cap userspace-api/media/v4l/vidioc-g-sliced-vbi-cap
+media/uapi/v4l/vidioc-g-std userspace-api/media/v4l/vidioc-g-std
+media/uapi/v4l/vidioc-g-tuner userspace-api/media/v4l/vidioc-g-tuner
+media/uapi/v4l/vidioc-log-status userspace-api/media/v4l/vidioc-log-status
+media/uapi/v4l/vidioc-overlay userspace-api/media/v4l/vidioc-overlay
+media/uapi/v4l/vidioc-prepare-buf userspace-api/media/v4l/vidioc-prepare-buf
+media/uapi/v4l/vidioc-qbuf userspace-api/media/v4l/vidioc-qbuf
+media/uapi/v4l/vidioc-query-dv-timings userspace-api/media/v4l/vidioc-query-dv-timings
+media/uapi/v4l/vidioc-querybuf userspace-api/media/v4l/vidioc-querybuf
+media/uapi/v4l/vidioc-querycap userspace-api/media/v4l/vidioc-querycap
+media/uapi/v4l/vidioc-queryctrl userspace-api/media/v4l/vidioc-queryctrl
+media/uapi/v4l/vidioc-querystd userspace-api/media/v4l/vidioc-querystd
+media/uapi/v4l/vidioc-reqbufs userspace-api/media/v4l/vidioc-reqbufs
+media/uapi/v4l/vidioc-s-hw-freq-seek userspace-api/media/v4l/vidioc-s-hw-freq-seek
+media/uapi/v4l/vidioc-streamon userspace-api/media/v4l/vidioc-streamon
+media/uapi/v4l/vidioc-subdev-enum-frame-interval userspace-api/media/v4l/vidioc-subdev-enum-frame-interval
+media/uapi/v4l/vidioc-subdev-enum-frame-size userspace-api/media/v4l/vidioc-subdev-enum-frame-size
+media/uapi/v4l/vidioc-subdev-enum-mbus-code userspace-api/media/v4l/vidioc-subdev-enum-mbus-code
+media/uapi/v4l/vidioc-subdev-g-crop userspace-api/media/v4l/vidioc-subdev-g-crop
+media/uapi/v4l/vidioc-subdev-g-fmt userspace-api/media/v4l/vidioc-subdev-g-fmt
+media/uapi/v4l/vidioc-subdev-g-frame-interval userspace-api/media/v4l/vidioc-subdev-g-frame-interval
+media/uapi/v4l/vidioc-subdev-g-selection userspace-api/media/v4l/vidioc-subdev-g-selection
+media/uapi/v4l/vidioc-subscribe-event userspace-api/media/v4l/vidioc-subscribe-event
+media/uapi/v4l/yuv-formats userspace-api/media/v4l/yuv-formats
+media/v4l-drivers/au0828-cardlist admin-guide/media/au0828-cardlist
+media/v4l-drivers/bttv admin-guide/media/bttv
+media/v4l-drivers/bttv-cardlist admin-guide/media/bttv-cardlist
+media/v4l-drivers/bttv-devel driver-api/media/drivers/bttv-devel
+media/v4l-drivers/cafe_ccic admin-guide/media/cafe_ccic
+media/v4l-drivers/cardlist admin-guide/media/cardlist
+media/v4l-drivers/cx2341x driver-api/media/drivers/cx2341x-devel
+media/v4l-drivers/cx2341x-devel driver-api/media/drivers/cx2341x-devel
+media/v4l-drivers/cx2341x-uapi userspace-api/media/drivers/cx2341x-uapi
+media/v4l-drivers/cx23885-cardlist admin-guide/media/cx23885-cardlist
+media/v4l-drivers/cx88 admin-guide/media/cx88
+media/v4l-drivers/cx88-cardlist admin-guide/media/cx88-cardlist
+media/v4l-drivers/cx88-devel driver-api/media/drivers/cx88-devel
+media/v4l-drivers/em28xx-cardlist admin-guide/media/em28xx-cardlist
+media/v4l-drivers/fimc admin-guide/media/fimc
+media/v4l-drivers/fimc-devel driver-api/media/drivers/fimc-devel
+media/v4l-drivers/fourcc userspace-api/media/v4l/fourcc
+media/v4l-drivers/gspca-cardlist admin-guide/media/gspca-cardlist
+media/v4l-drivers/imx admin-guide/media/imx
+media/v4l-drivers/imx-uapi userspace-api/media/drivers/imx-uapi
+media/v4l-drivers/imx7 admin-guide/media/imx7
+media/v4l-drivers/index userspace-api/media/drivers/index
+media/v4l-drivers/ipu3 admin-guide/media/ipu3
+media/v4l-drivers/ivtv admin-guide/media/ivtv
+media/v4l-drivers/ivtv-cardlist admin-guide/media/ivtv-cardlist
+media/v4l-drivers/max2175 userspace-api/media/drivers/max2175
+media/v4l-drivers/omap3isp admin-guide/media/omap3isp
+media/v4l-drivers/omap3isp-uapi userspace-api/media/drivers/omap3isp-uapi
+media/v4l-drivers/philips admin-guide/media/philips
+media/v4l-drivers/pvrusb2 driver-api/media/drivers/pvrusb2
+media/v4l-drivers/pxa_camera driver-api/media/drivers/pxa_camera
+media/v4l-drivers/qcom_camss admin-guide/media/qcom_camss
+media/v4l-drivers/radiotrack driver-api/media/drivers/radiotrack
+media/v4l-drivers/rcar-fdp1 admin-guide/media/rcar-fdp1
+media/v4l-drivers/saa7134 admin-guide/media/saa7134
+media/v4l-drivers/saa7134-cardlist admin-guide/media/saa7134-cardlist
+media/v4l-drivers/saa7134-devel driver-api/media/drivers/saa7134-devel
+media/v4l-drivers/saa7164-cardlist admin-guide/media/saa7164-cardlist
+media/v4l-drivers/sh_mobile_ceu_camera driver-api/media/drivers/sh_mobile_ceu_camera
+media/v4l-drivers/si470x admin-guide/media/si470x
+media/v4l-drivers/si4713 admin-guide/media/si4713
+media/v4l-drivers/si476x admin-guide/media/si476x
+media/v4l-drivers/tuner-cardlist admin-guide/media/tuner-cardlist
+media/v4l-drivers/tuners driver-api/media/drivers/tuners
+media/v4l-drivers/uvcvideo userspace-api/media/drivers/uvcvideo
+media/v4l-drivers/v4l-with-ir admin-guide/media/remote-controller
+media/v4l-drivers/vimc admin-guide/media/vimc
+media/v4l-drivers/vimc-devel driver-api/media/drivers/vimc-devel
+media/v4l-drivers/vivid admin-guide/media/vivid
+media/v4l-drivers/zoran driver-api/media/drivers/zoran
+memory-devices/ti-emif driver-api/memory-devices/ti-emif
+mips/booting arch/mips/booting
+mips/features arch/mips/features
+mips/index arch/mips/index
+mips/ingenic-tcu arch/mips/ingenic-tcu
+mm/slub admin-guide/mm/slab
+mmc/index driver-api/mmc/index
+mmc/mmc-async-req driver-api/mmc/mmc-async-req
+mmc/mmc-dev-attrs driver-api/mmc/mmc-dev-attrs
+mmc/mmc-dev-parts driver-api/mmc/mmc-dev-parts
+mmc/mmc-tools driver-api/mmc/mmc-tools
+mtd/index driver-api/mtd/index
+mtd/intel-spi driver-api/mtd/spi-intel
+mtd/nand_ecc driver-api/mtd/nand_ecc
+mtd/spi-nor driver-api/mtd/spi-nor
+namespaces/compatibility-list admin-guide/namespaces/compatibility-list
+namespaces/index admin-guide/namespaces/index
+namespaces/resource-control admin-guide/namespaces/resource-control
+networking/altera_tse networking/device_drivers/ethernet/altera/altera_tse
+networking/baycom networking/device_drivers/hamradio/baycom
+networking/bpf_flow_dissector bpf/prog_flow_dissector
+networking/cxacru networking/device_drivers/atm/cxacru
+networking/defza networking/device_drivers/fddi/defza
+networking/device_drivers/3com/3c509 networking/device_drivers/ethernet/3com/3c509
+networking/device_drivers/3com/vortex networking/device_drivers/ethernet/3com/vortex
+networking/device_drivers/amazon/ena networking/device_drivers/ethernet/amazon/ena
+networking/device_drivers/aquantia/atlantic networking/device_drivers/ethernet/aquantia/atlantic
+networking/device_drivers/chelsio/cxgb networking/device_drivers/ethernet/chelsio/cxgb
+networking/device_drivers/cirrus/cs89x0 networking/device_drivers/ethernet/cirrus/cs89x0
+networking/device_drivers/davicom/dm9000 networking/device_drivers/ethernet/davicom/dm9000
+networking/device_drivers/dec/dmfe networking/device_drivers/ethernet/dec/dmfe
+networking/device_drivers/dlink/dl2k networking/device_drivers/ethernet/dlink/dl2k
+networking/device_drivers/freescale/dpaa networking/device_drivers/ethernet/freescale/dpaa
+networking/device_drivers/freescale/dpaa2/dpio-driver networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver
+networking/device_drivers/freescale/dpaa2/ethernet-driver networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver
+networking/device_drivers/freescale/dpaa2/index networking/device_drivers/ethernet/freescale/dpaa2/index
+networking/device_drivers/freescale/dpaa2/mac-phy-support networking/device_drivers/ethernet/freescale/dpaa2/mac-phy-support
+networking/device_drivers/freescale/dpaa2/overview networking/device_drivers/ethernet/freescale/dpaa2/overview
+networking/device_drivers/freescale/gianfar networking/device_drivers/ethernet/freescale/gianfar
+networking/device_drivers/google/gve networking/device_drivers/ethernet/google/gve
+networking/device_drivers/intel/e100 networking/device_drivers/ethernet/intel/e100
+networking/device_drivers/intel/e1000 networking/device_drivers/ethernet/intel/e1000
+networking/device_drivers/intel/e1000e networking/device_drivers/ethernet/intel/e1000e
+networking/device_drivers/intel/fm10k networking/device_drivers/ethernet/intel/fm10k
+networking/device_drivers/intel/i40e networking/device_drivers/ethernet/intel/i40e
+networking/device_drivers/intel/iavf networking/device_drivers/ethernet/intel/iavf
+networking/device_drivers/intel/ice networking/device_drivers/ethernet/intel/ice
+networking/device_drivers/intel/igb networking/device_drivers/ethernet/intel/igb
+networking/device_drivers/intel/igbvf networking/device_drivers/ethernet/intel/igbvf
+networking/device_drivers/intel/ipw2100 networking/device_drivers/wifi/intel/ipw2100
+networking/device_drivers/intel/ipw2200 networking/device_drivers/wifi/intel/ipw2200
+networking/device_drivers/intel/ixgbe networking/device_drivers/ethernet/intel/ixgbe
+networking/device_drivers/intel/ixgbevf networking/device_drivers/ethernet/intel/ixgbevf
+networking/device_drivers/marvell/octeontx2 networking/device_drivers/ethernet/marvell/octeontx2
+networking/device_drivers/microsoft/netvsc networking/device_drivers/ethernet/microsoft/netvsc
+networking/device_drivers/neterion/s2io networking/device_drivers/ethernet/neterion/s2io
+networking/device_drivers/netronome/nfp networking/device_drivers/ethernet/netronome/nfp
+networking/device_drivers/pensando/ionic networking/device_drivers/ethernet/pensando/ionic
+networking/device_drivers/qualcomm/rmnet networking/device_drivers/cellular/qualcomm/rmnet
+networking/device_drivers/smsc/smc9 networking/device_drivers/ethernet/smsc/smc9
+networking/device_drivers/stmicro/stmmac networking/device_drivers/ethernet/stmicro/stmmac
+networking/device_drivers/ti/cpsw networking/device_drivers/ethernet/ti/cpsw
+networking/device_drivers/ti/cpsw_switchdev networking/device_drivers/ethernet/ti/cpsw_switchdev
+networking/device_drivers/ti/tlan networking/device_drivers/ethernet/ti/tlan
+networking/devlink-trap networking/devlink/devlink-trap
+networking/dpaa2/dpio-driver networking/device_drivers/ethernet/freescale/dpaa2/dpio-driver
+networking/dpaa2/ethernet-driver networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver
+networking/dpaa2/index networking/device_drivers/ethernet/freescale/dpaa2/index
+networking/dpaa2/overview networking/device_drivers/ethernet/freescale/dpaa2/overview
+networking/e100 networking/device_drivers/ethernet/intel/e100
+networking/e1000 networking/device_drivers/ethernet/intel/e1000
+networking/e1000e networking/device_drivers/ethernet/intel/e1000e
+networking/fm10k networking/device_drivers/ethernet/intel/fm10k
+networking/fore200e networking/device_drivers/atm/fore200e
+networking/hinic networking/device_drivers/ethernet/huawei/hinic
+networking/i40e networking/device_drivers/ethernet/intel/i40e
+networking/iavf networking/device_drivers/ethernet/intel/iavf
+networking/ice networking/device_drivers/ethernet/intel/ice
+networking/igb networking/device_drivers/ethernet/intel/igb
+networking/igbvf networking/device_drivers/ethernet/intel/igbvf
+networking/iphase networking/device_drivers/atm/iphase
+networking/ixgbe networking/device_drivers/ethernet/intel/ixgbe
+networking/ixgbevf networking/device_drivers/ethernet/intel/ixgbevf
+networking/netdev-FAQ process/maintainer-netdev
+networking/skfp networking/device_drivers/fddi/skfp
+networking/z8530drv networking/device_drivers/hamradio/z8530drv
+nfc/index driver-api/nfc/index
+nfc/nfc-hci driver-api/nfc/nfc-hci
+nfc/nfc-pn544 driver-api/nfc/nfc-pn544
+nios2/features arch/nios2/features
+nios2/index arch/nios2/index
+nios2/nios2 arch/nios2/nios2
+nvdimm/btt driver-api/nvdimm/btt
+nvdimm/index driver-api/nvdimm/index
+nvdimm/nvdimm driver-api/nvdimm/nvdimm
+nvdimm/security driver-api/nvdimm/security
+nvmem/nvmem driver-api/nvmem
+openrisc/features arch/openrisc/features
+openrisc/index arch/openrisc/index
+openrisc/openrisc_port arch/openrisc/openrisc_port
+openrisc/todo arch/openrisc/todo
+parisc/debugging arch/parisc/debugging
+parisc/features arch/parisc/features
+parisc/index arch/parisc/index
+parisc/registers arch/parisc/registers
+perf/arm-ccn admin-guide/perf/arm-ccn
+perf/arm_dsu_pmu admin-guide/perf/arm_dsu_pmu
+perf/hisi-pmu admin-guide/perf/hisi-pmu
+perf/index admin-guide/perf/index
+perf/qcom_l2_pmu admin-guide/perf/qcom_l2_pmu
+perf/qcom_l3_pmu admin-guide/perf/qcom_l3_pmu
+perf/thunderx2-pmu admin-guide/perf/thunderx2-pmu
+perf/xgene-pmu admin-guide/perf/xgene-pmu
+phy/samsung-usb2 driver-api/phy/samsung-usb2
+powerpc/associativity arch/powerpc/associativity
+powerpc/booting arch/powerpc/booting
+powerpc/bootwrapper arch/powerpc/bootwrapper
+powerpc/cpu_families arch/powerpc/cpu_families
+powerpc/cpu_features arch/powerpc/cpu_features
+powerpc/dawr-power9 arch/powerpc/dawr-power9
+powerpc/dexcr arch/powerpc/dexcr
+powerpc/dscr arch/powerpc/dscr
+powerpc/eeh-pci-error-recovery arch/powerpc/eeh-pci-error-recovery
+powerpc/elf_hwcaps arch/powerpc/elf_hwcaps
+powerpc/elfnote arch/powerpc/elfnote
+powerpc/features arch/powerpc/features
+powerpc/firmware-assisted-dump arch/powerpc/firmware-assisted-dump
+powerpc/hvcs arch/powerpc/hvcs
+powerpc/imc arch/powerpc/imc
+powerpc/index arch/powerpc/index
+powerpc/isa-versions arch/powerpc/isa-versions
+powerpc/kaslr-booke32 arch/powerpc/kaslr-booke32
+powerpc/mpc52xx arch/powerpc/mpc52xx
+powerpc/papr_hcalls arch/powerpc/papr_hcalls
+powerpc/pci_iov_resource_on_powernv arch/powerpc/pci_iov_resource_on_powernv
+powerpc/pmu-ebb arch/powerpc/pmu-ebb
+powerpc/ptrace arch/powerpc/ptrace
+powerpc/qe_firmware arch/powerpc/qe_firmware
+powerpc/syscall64-abi arch/powerpc/syscall64-abi
+powerpc/transactional_memory arch/powerpc/transactional_memory
+powerpc/ultravisor arch/powerpc/ultravisor
+powerpc/vas-api arch/powerpc/vas-api
+powerpc/vcpudispatch_stats arch/powerpc/vcpudispatch_stats
+powerpc/vmemmap_dedup arch/powerpc/vmemmap_dedup
+process/clang-format dev-tools/clang-format
+process/magic-number staging/magic-number
+process/unaligned-memory-access core-api/unaligned-memory-access
+rapidio/index driver-api/rapidio/index
+rapidio/mport_cdev driver-api/rapidio/mport_cdev
+rapidio/rapidio driver-api/rapidio/rapidio
+rapidio/rio_cm driver-api/rapidio/rio_cm
+rapidio/sysfs driver-api/rapidio/sysfs
+rapidio/tsi721 driver-api/rapidio/tsi721
+riscv/acpi arch/riscv/acpi
+riscv/boot arch/riscv/boot
+riscv/boot-image-header arch/riscv/boot-image-header
+riscv/features arch/riscv/features
+riscv/hwprobe arch/riscv/hwprobe
+riscv/index arch/riscv/index
+riscv/patch-acceptance arch/riscv/patch-acceptance
+riscv/uabi arch/riscv/uabi
+riscv/vector arch/riscv/vector
+riscv/vm-layout arch/riscv/vm-layout
+s390/3270 arch/s390/3270
+s390/cds arch/s390/cds
+s390/common_io arch/s390/common_io
+s390/driver-model arch/s390/driver-model
+s390/features arch/s390/features
+s390/index arch/s390/index
+s390/monreader arch/s390/monreader
+s390/pci arch/s390/pci
+s390/qeth arch/s390/qeth
+s390/s390dbf arch/s390/s390dbf
+s390/text_files arch/s390/text_files
+s390/vfio-ap arch/s390/vfio-ap
+s390/vfio-ap-locking arch/s390/vfio-ap-locking
+s390/vfio-ccw arch/s390/vfio-ccw
+s390/zfcpdump arch/s390/zfcpdump
+security/LSM security/lsm-development
+security/LSM-sctp security/SCTP
+serial/driver driver-api/serial/driver
+serial/index driver-api/serial/index
+serial/moxa-smartio driver-api/tty/moxa-smartio
+serial/n_gsm driver-api/tty/n_gsm
+serial/serial-iso7816 driver-api/serial/serial-iso7816
+serial/serial-rs485 driver-api/serial/serial-rs485
+serial/tty driver-api/tty/tty_ldisc
+sh/booting arch/sh/booting
+sh/features arch/sh/features
+sh/index arch/sh/index
+sh/new-machine arch/sh/new-machine
+sh/register-banks arch/sh/register-banks
+sparc/adi arch/sparc/adi
+sparc/console arch/sparc/console
+sparc/features arch/sparc/features
+sparc/index arch/sparc/index
+sparc/oradax/oracle-dax arch/sparc/oradax/oracle-dax
+staging/kprobes trace/kprobes
+sysctl/abi admin-guide/sysctl/abi
+sysctl/fs admin-guide/sysctl/fs
+sysctl/index admin-guide/sysctl/index
+sysctl/kernel admin-guide/sysctl/kernel
+sysctl/net admin-guide/sysctl/net
+sysctl/sunrpc admin-guide/sysctl/sunrpc
+sysctl/user admin-guide/sysctl/user
+sysctl/vm admin-guide/sysctl/vm
+thermal/cpu-cooling-api driver-api/thermal/cpu-cooling-api
+thermal/exynos_thermal driver-api/thermal/exynos_thermal
+thermal/exynos_thermal_emulation driver-api/thermal/exynos_thermal_emulation
+thermal/index driver-api/thermal/index
+thermal/intel_powerclamp admin-guide/thermal/intel_powerclamp
+thermal/nouveau_thermal driver-api/thermal/nouveau_thermal
+thermal/power_allocator driver-api/thermal/power_allocator
+thermal/sysfs-api driver-api/thermal/sysfs-api
+thermal/x86_pkg_temperature_thermal driver-api/thermal/x86_pkg_temperature_thermal
+tpm/index security/tpm/index
+tpm/tpm_vtpm_proxy security/tpm/tpm_vtpm_proxy
+trace/coresight trace/coresight/coresight
+trace/coresight-cpu-debug trace/coresight/coresight-cpu-debug
+trace/rv/da_monitor_synthesis trace/rv/monitor_synthesis
+translations/it_IT/admin-guide/security-bugs translations/it_IT/process/security-bugs
+translations/it_IT/process/clang-format translations/it_IT/dev-tools/clang-format
+translations/it_IT/process/magic-number translations/it_IT/staging/magic-number
+translations/it_IT/riscv/patch-acceptance translations/it_IT/arch/riscv/patch-acceptance
+translations/ja_JP/howto translations/ja_JP/process/howto
+translations/ko_KR/howto translations/ko_KR/process/howto
+translations/sp_SP/howto translations/sp_SP/process/howto
+translations/sp_SP/submitting-patches translations/sp_SP/process/submitting-patches
+translations/zh_CN/admin-guide/security-bugs translations/zh_CN/process/security-bugs
+translations/zh_CN/arch translations/zh_CN/arch/index
+translations/zh_CN/arm64/amu translations/zh_CN/arch/arm64/amu
+translations/zh_CN/arm64/elf_hwcaps translations/zh_CN/arch/arm64/elf_hwcaps
+translations/zh_CN/arm64/hugetlbpage translations/zh_CN/arch/arm64/hugetlbpage
+translations/zh_CN/arm64/index translations/zh_CN/arch/arm64/index
+translations/zh_CN/arm64/perf translations/zh_CN/arch/arm64/perf
+translations/zh_CN/coding-style translations/zh_CN/process/coding-style
+translations/zh_CN/loongarch/booting translations/zh_CN/arch/loongarch/booting
+translations/zh_CN/loongarch/features translations/zh_CN/arch/loongarch/features
+translations/zh_CN/loongarch/index translations/zh_CN/arch/loongarch/index
+translations/zh_CN/loongarch/introduction translations/zh_CN/arch/loongarch/introduction
+translations/zh_CN/loongarch/irq-chip-model translations/zh_CN/arch/loongarch/irq-chip-model
+translations/zh_CN/mips/booting translations/zh_CN/arch/mips/booting
+translations/zh_CN/mips/features translations/zh_CN/arch/mips/features
+translations/zh_CN/mips/index translations/zh_CN/arch/mips/index
+translations/zh_CN/mips/ingenic-tcu translations/zh_CN/arch/mips/ingenic-tcu
+translations/zh_CN/openrisc/index translations/zh_CN/arch/openrisc/index
+translations/zh_CN/openrisc/openrisc_port translations/zh_CN/arch/openrisc/openrisc_port
+translations/zh_CN/openrisc/todo translations/zh_CN/arch/openrisc/todo
+translations/zh_CN/parisc/debugging translations/zh_CN/arch/parisc/debugging
+translations/zh_CN/parisc/index translations/zh_CN/arch/parisc/index
+translations/zh_CN/parisc/registers translations/zh_CN/arch/parisc/registers
+translations/zh_CN/riscv/boot-image-header translations/zh_CN/arch/riscv/boot-image-header
+translations/zh_CN/riscv/index translations/zh_CN/arch/riscv/index
+translations/zh_CN/riscv/patch-acceptance translations/zh_CN/arch/riscv/patch-acceptance
+translations/zh_CN/riscv/vm-layout translations/zh_CN/arch/riscv/vm-layout
+translations/zh_CN/vm/active_mm translations/zh_CN/mm/active_mm
+translations/zh_CN/vm/balance translations/zh_CN/mm/balance
+translations/zh_CN/vm/damon/api translations/zh_CN/mm/damon/api
+translations/zh_CN/vm/damon/design translations/zh_CN/mm/damon/design
+translations/zh_CN/vm/damon/faq translations/zh_CN/mm/damon/faq
+translations/zh_CN/vm/damon/index translations/zh_CN/mm/damon/index
+translations/zh_CN/vm/free_page_reporting translations/zh_CN/mm/free_page_reporting
+translations/zh_CN/vm/highmem translations/zh_CN/mm/highmem
+translations/zh_CN/vm/hmm translations/zh_CN/mm/hmm
+translations/zh_CN/vm/hugetlbfs_reserv translations/zh_CN/mm/hugetlbfs_reserv
+translations/zh_CN/vm/hwpoison translations/zh_CN/mm/hwpoison
+translations/zh_CN/vm/index translations/zh_CN/mm/index
+translations/zh_CN/vm/ksm translations/zh_CN/mm/ksm
+translations/zh_CN/vm/memory-model translations/zh_CN/mm/memory-model
+translations/zh_CN/vm/mmu_notifier translations/zh_CN/mm/mmu_notifier
+translations/zh_CN/vm/numa translations/zh_CN/mm/numa
+translations/zh_CN/vm/overcommit-accounting translations/zh_CN/mm/overcommit-accounting
+translations/zh_CN/vm/page_frags translations/zh_CN/mm/page_frags
+translations/zh_CN/vm/page_owner translations/zh_CN/mm/page_owner
+translations/zh_CN/vm/page_table_check translations/zh_CN/mm/page_table_check
+translations/zh_CN/vm/remap_file_pages translations/zh_CN/mm/remap_file_pages
+translations/zh_CN/vm/split_page_table_lock translations/zh_CN/mm/split_page_table_lock
+translations/zh_CN/vm/zsmalloc translations/zh_CN/mm/zsmalloc
+translations/zh_TW/arm64/amu translations/zh_TW/arch/arm64/amu
+translations/zh_TW/arm64/elf_hwcaps translations/zh_TW/arch/arm64/elf_hwcaps
+translations/zh_TW/arm64/hugetlbpage translations/zh_TW/arch/arm64/hugetlbpage
+translations/zh_TW/arm64/index translations/zh_TW/arch/arm64/index
+translations/zh_TW/arm64/perf translations/zh_TW/arch/arm64/perf
+tty/device_drivers/oxsemi-tornado misc-devices/oxsemi-tornado
+tty/index driver-api/tty/index
+tty/n_tty driver-api/tty/n_tty
+tty/tty_buffer driver-api/tty/tty_buffer
+tty/tty_driver driver-api/tty/tty_driver
+tty/tty_internals driver-api/tty/tty_internals
+tty/tty_ldisc driver-api/tty/tty_ldisc
+tty/tty_port driver-api/tty/tty_port
+tty/tty_struct driver-api/tty/tty_struct
+usb/typec driver-api/usb/typec
+usb/usb3-debug-port driver-api/usb/usb3-debug-port
+userspace-api/media/drivers/st-vgxy61 userspace-api/media/drivers/vgxy61
+userspace-api/media/v4l/pixfmt-meta-d4xx userspace-api/media/v4l/metafmt-d4xx
+userspace-api/media/v4l/pixfmt-meta-intel-ipu3 userspace-api/media/v4l/metafmt-intel-ipu3
+userspace-api/media/v4l/pixfmt-meta-rkisp1 userspace-api/media/v4l/metafmt-rkisp1
+userspace-api/media/v4l/pixfmt-meta-uvc userspace-api/media/v4l/metafmt-uvc
+userspace-api/media/v4l/pixfmt-meta-vivid userspace-api/media/v4l/metafmt-vivid
+userspace-api/media/v4l/pixfmt-meta-vsp1-hgo userspace-api/media/v4l/metafmt-vsp1-hgo
+userspace-api/media/v4l/pixfmt-meta-vsp1-hgt userspace-api/media/v4l/metafmt-vsp1-hgt
+virt/coco/sevguest virt/coco/sev-guest
+virt/kvm/amd-memory-encryption virt/kvm/x86/amd-memory-encryption
+virt/kvm/arm/psci virt/kvm/arm/fw-pseudo-registers
+virt/kvm/cpuid virt/kvm/x86/cpuid
+virt/kvm/hypercalls virt/kvm/x86/hypercalls
+virt/kvm/mmu virt/kvm/x86/mmu
+virt/kvm/msr virt/kvm/x86/msr
+virt/kvm/nested-vmx virt/kvm/x86/nested-vmx
+virt/kvm/running-nested-guests virt/kvm/x86/running-nested-guests
+virt/kvm/s390-diag virt/kvm/s390/s390-diag
+virt/kvm/s390-pv virt/kvm/s390/s390-pv
+virt/kvm/s390-pv-boot virt/kvm/s390/s390-pv-boot
+virt/kvm/timekeeping virt/kvm/x86/timekeeping
+virt/kvm/x86/halt-polling virt/kvm/halt-polling
+virtual/index virt/index
+virtual/kvm/amd-memory-encryption virt/kvm/x86/amd-memory-encryption
+virtual/kvm/cpuid virt/kvm/x86/cpuid
+virtual/kvm/index virt/kvm/index
+virtual/kvm/vcpu-requests virt/kvm/vcpu-requests
+virtual/paravirt_ops virt/paravirt_ops
+vm/active_mm mm/active_mm
+vm/arch_pgtable_helpers mm/arch_pgtable_helpers
+vm/balance mm/balance
+vm/bootmem mm/bootmem
+vm/damon/api mm/damon/api
+vm/damon/design mm/damon/design
+vm/damon/faq mm/damon/faq
+vm/damon/index mm/damon/index
+vm/free_page_reporting mm/free_page_reporting
+vm/highmem mm/highmem
+vm/hmm mm/hmm
+vm/hugetlbfs_reserv mm/hugetlbfs_reserv
+vm/hugetlbpage admin-guide/mm/hugetlbpage
+vm/hwpoison mm/hwpoison
+vm/idle_page_tracking admin-guide/mm/idle_page_tracking
+vm/index mm/index
+vm/ksm mm/ksm
+vm/memory-model mm/memory-model
+vm/mmu_notifier mm/mmu_notifier
+vm/numa mm/numa
+vm/numa_memory_policy admin-guide/mm/numa_memory_policy
+vm/oom mm/oom
+vm/overcommit-accounting mm/overcommit-accounting
+vm/page_allocation mm/page_allocation
+vm/page_cache mm/page_cache
+vm/page_frags mm/page_frags
+vm/page_migration mm/page_migration
+vm/page_owner mm/page_owner
+vm/page_reclaim mm/page_reclaim
+vm/page_table_check mm/page_table_check
+vm/page_tables mm/page_tables
+vm/pagemap admin-guide/mm/pagemap
+vm/physical_memory mm/physical_memory
+vm/process_addrs mm/process_addrs
+vm/remap_file_pages mm/remap_file_pages
+vm/shmfs mm/shmfs
+vm/slab mm/slab
+vm/slub admin-guide/mm/slab
+vm/soft-dirty admin-guide/mm/soft-dirty
+vm/split_page_table_lock mm/split_page_table_lock
+vm/swap mm/swap
+vm/swap_numa admin-guide/mm/swap_numa
+vm/transhuge mm/transhuge
+vm/unevictable-lru mm/unevictable-lru
+vm/userfaultfd admin-guide/mm/userfaultfd
+vm/vmalloc mm/vmalloc
+vm/vmalloced-kernel-stacks mm/vmalloced-kernel-stacks
+vm/vmemmap_dedup mm/vmemmap_dedup
+vm/zsmalloc mm/zsmalloc
+vm/zswap admin-guide/mm/zswap
+watch_queue core-api/watch_queue
+x86/amd-memory-encryption arch/x86/amd-memory-encryption
+x86/amd_hsmp arch/x86/amd_hsmp
+x86/boot arch/x86/boot
+x86/booting-dt arch/x86/booting-dt
+x86/buslock arch/x86/buslock
+x86/cpuinfo arch/x86/cpuinfo
+x86/earlyprintk arch/x86/earlyprintk
+x86/elf_auxvec arch/x86/elf_auxvec
+x86/entry_64 arch/x86/entry_64
+x86/exception-tables arch/x86/exception-tables
+x86/features arch/x86/features
+x86/i386/IO-APIC arch/x86/i386/IO-APIC
+x86/i386/index arch/x86/i386/index
+x86/ifs arch/x86/ifs
+x86/index arch/x86/index
+x86/intel-hfi arch/x86/intel-hfi
+x86/intel_txt arch/x86/intel_txt
+x86/iommu arch/x86/iommu
+x86/kernel-stacks arch/x86/kernel-stacks
+x86/mds arch/x86/mds
+x86/microcode arch/x86/microcode
+x86/mtrr arch/x86/mtrr
+x86/orc-unwinder arch/x86/orc-unwinder
+x86/pat arch/x86/pat
+x86/protection-keys core-api/protection-keys
+x86/pti arch/x86/pti
+x86/resctrl filesystems/resctrl
+x86/resctrl_ui filesystems/resctrl
+x86/sgx arch/x86/sgx
+x86/sva arch/x86/sva
+x86/tdx arch/x86/tdx
+x86/tlb arch/x86/tlb
+x86/topology arch/x86/topology
+x86/tsx_async_abort arch/x86/tsx_async_abort
+x86/usb-legacy-support arch/x86/usb-legacy-support
+x86/x86_64/5level-paging arch/x86/x86_64/5level-paging
+x86/x86_64/cpu-hotplug-spec arch/x86/x86_64/cpu-hotplug-spec
+x86/x86_64/fake-numa-for-cpusets arch/x86/x86_64/fake-numa-for-cpusets
+x86/x86_64/fsgs arch/x86/x86_64/fsgs
+x86/x86_64/index arch/x86/x86_64/index
+x86/x86_64/machinecheck arch/x86/x86_64/machinecheck
+x86/x86_64/mm arch/x86/x86_64/mm
+x86/x86_64/uefi arch/x86/x86_64/uefi
+x86/xstate arch/x86/xstate
+x86/zero-page arch/x86/zero-page
+xilinx/eemi driver-api/xilinx/eemi
+xilinx/index driver-api/xilinx/index
+xtensa/atomctl arch/xtensa/atomctl
+xtensa/booting arch/xtensa/booting
+xtensa/features arch/xtensa/features
+xtensa/index arch/xtensa/index
+xtensa/mmu arch/xtensa/mmu
diff --git a/Documentation/ABI/README b/Documentation/ABI/README
index 8bac9cb09a6d..315fffe1f831 100644
--- a/Documentation/ABI/README
+++ b/Documentation/ABI/README
@@ -1,4 +1,5 @@
-This directory attempts to document the ABI between the Linux kernel and
+This part of the documentation inside Documentation/ABI directory
+attempts to document the ABI between the Linux kernel and
userspace, and the relative stability of these interfaces. Due to the
everchanging nature of Linux, and the differing maturity levels, these
interfaces should be used by userspace programs in different ways.
@@ -45,7 +46,9 @@ Every file in these directories will contain the following information:
What: Short description of the interface
Date: Date created
-KernelVersion: Kernel version this feature first showed up in.
+KernelVersion: (Optional) Kernel version this feature first showed up in.
+ Note: git history often provides more accurate version
+ info, so this field may be omitted.
Contact: Primary contact for this interface (may be a mailing list)
Description: Long description of the interface and how to use it.
Users: All users of this interface who wish to be notified when
diff --git a/Documentation/ABI/obsolete/automount-tracefs-debugfs b/Documentation/ABI/obsolete/automount-tracefs-debugfs
new file mode 100644
index 000000000000..a5196ec78cb5
--- /dev/null
+++ b/Documentation/ABI/obsolete/automount-tracefs-debugfs
@@ -0,0 +1,20 @@
+What: /sys/kernel/debug/tracing
+Date: May 2008
+KernelVersion: 2.6.27
+Contact: linux-trace-kernel@vger.kernel.org
+Description:
+
+ The ftrace was first added to the kernel, its interface was placed
+ into the debugfs file system under the "tracing" directory. Access
+ to the files were in /sys/kernel/debug/tracing. As systems wanted
+ access to the tracing interface without having to enable debugfs, a
+ new interface was created called "tracefs". This was a stand alone
+ file system and was usually mounted in /sys/kernel/tracing.
+
+ To allow older tooling to continue to operate, when mounting
+ debugfs, the tracefs file system would automatically get mounted in
+ the "tracing" directory of debugfs. The tracefs interface was added
+ in January 2015 in the v4.1 kernel.
+
+ All tooling should now be using tracefs directly and the "tracing"
+ directory in debugfs should be removed by January 2030.
diff --git a/Documentation/ABI/obsolete/sysfs-bus-iio b/Documentation/ABI/obsolete/sysfs-bus-iio
index b64394b0b374..a13523561958 100644
--- a/Documentation/ABI/obsolete/sysfs-bus-iio
+++ b/Documentation/ABI/obsolete/sysfs-bus-iio
@@ -48,10 +48,6 @@ What: /sys/.../iio:deviceX/scan_elements/in_timestamp_en
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_en
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_en
What: /sys/.../iio:deviceX/scan_elements/in_voltageY-voltageZ_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_en
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_en
What: /sys/.../iio:deviceX/scan_elements/in_incli_x_en
What: /sys/.../iio:deviceX/scan_elements/in_incli_y_en
What: /sys/.../iio:deviceX/scan_elements/in_pressureY_en
@@ -73,10 +69,6 @@ What: /sys/.../iio:deviceX/scan_elements/in_incli_type
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_type
What: /sys/.../iio:deviceX/scan_elements/in_voltage_type
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_type
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_type
What: /sys/.../iio:deviceX/scan_elements/in_timestamp_type
What: /sys/.../iio:deviceX/scan_elements/in_pressureY_type
What: /sys/.../iio:deviceX/scan_elements/in_pressure_type
@@ -110,10 +102,6 @@ Description:
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_index
What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_i_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltageY_q_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_i_index
-What: /sys/.../iio:deviceX/scan_elements/in_voltage_q_index
What: /sys/.../iio:deviceX/scan_elements/in_accel_x_index
What: /sys/.../iio:deviceX/scan_elements/in_accel_y_index
What: /sys/.../iio:deviceX/scan_elements/in_accel_z_index
diff --git a/Documentation/ABI/obsolete/sysfs-driver-samsung-laptop b/Documentation/ABI/obsolete/sysfs-driver-samsung-laptop
new file mode 100644
index 000000000000..204c3f3a1d78
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-driver-samsung-laptop
@@ -0,0 +1,10 @@
+What: /sys/devices/platform/samsung/battery_life_extender
+Date: December 1, 2011
+KernelVersion: 3.3
+Contact: Corentin Chary <corentin.chary@gmail.com>
+Description: Max battery charge level can be modified, battery cycle
+ life can be extended by reducing the max battery charge
+ level.
+
+ - 0 means normal battery mode (100% charge)
+ - 1 means battery life extender mode (80% charge)
diff --git a/Documentation/ABI/obsolete/sysfs-gpio b/Documentation/ABI/obsolete/sysfs-gpio
index b8b0fd341c17..0d3f12c4dcbd 100644
--- a/Documentation/ABI/obsolete/sysfs-gpio
+++ b/Documentation/ABI/obsolete/sysfs-gpio
@@ -19,14 +19,22 @@ Description:
/export ... asks the kernel to export a GPIO to userspace
/unexport ... to return a GPIO to the kernel
/gpioN ... for each exported GPIO #N OR
- /<LINE-NAME> ... for a properly named GPIO line
/value ... always readable, writes fail for input GPIOs
/direction ... r/w as: in, out (default low); write: high, low
/edge ... r/w as: none, falling, rising, both
+ /active_low ... r/w as: 0, 1
/gpiochipN ... for each gpiochip; #N is its first GPIO
/base ... (r/o) same as N
- /label ... (r/o) descriptive, not necessarily unique
+ /label ... (r/o) descriptive chip name
/ngpio ... (r/o) number of GPIOs; numbered N to N + (ngpio - 1)
+ /gpio<OFFSET>
+ /value ... always readable, writes fail for input GPIOs
+ /direction ... r/w as: in, out (default low); write: high, low
+ /chipX ... for each gpiochip; #X is the gpio device ID
+ /export ... asks the kernel to export a GPIO at HW offset X to userspace
+ /unexport ... to return a GPIO at HW offset X to the kernel
+ /label ... (r/o) descriptive chip name
+ /ngpio ... (r/o) number of GPIOs exposed by the chip
- This ABI is deprecated and will be removed after 2020. It is
- replaced with the GPIO character device.
+ This ABI is obsoleted by Documentation/ABI/testing/gpio-cdev and will be
+ removed after 2020.
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-kexec-kdump b/Documentation/ABI/obsolete/sysfs-kernel-kexec-kdump
new file mode 100644
index 000000000000..ba26a6a1d2be
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-kernel-kexec-kdump
@@ -0,0 +1,71 @@
+NOTE: all the ABIs listed in this file are deprecated and will be removed after 2028.
+
+Here are the alternative ABIs:
++------------------------------------+-----------------------------------------+
+| Deprecated | Alternative |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/kexec_loaded | /sys/kernel/kexec/loaded |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/kexec_crash_loaded | /sys/kernel/kexec/crash_loaded |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/kexec_crash_size | /sys/kernel/kexec/crash_size |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/crash_elfcorehdr_size | /sys/kernel/kexec/crash_elfcorehdr_size |
++------------------------------------+-----------------------------------------+
+| /sys/kernel/kexec_crash_cma_ranges | /sys/kernel/kexec/crash_cma_ranges |
++------------------------------------+-----------------------------------------+
+
+
+What: /sys/kernel/kexec_loaded
+Date: Jun 2006
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates whether a new kernel image has been loaded
+ into memory using the kexec system call. It shows 1 if
+ a kexec image is present and ready to boot, or 0 if none
+ is loaded.
+User: kexec tools, kdump service
+
+What: /sys/kernel/kexec_crash_loaded
+Date: Jun 2006
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates whether a crash (kdump) kernel is currently
+ loaded into memory. It shows 1 if a crash kernel has been
+ successfully loaded for panic handling, or 0 if no crash
+ kernel is present.
+User: Kexec tools, Kdump service
+
+What: /sys/kernel/kexec_crash_size
+Date: Dec 2009
+Contact: kexec@lists.infradead.org
+Description: read/write
+ Shows the amount of memory reserved for loading the crash
+ (kdump) kernel. It reports the size, in bytes, of the
+ crash kernel area defined by the crashkernel= parameter.
+ This interface also allows reducing the crashkernel
+ reservation by writing a smaller value, and the reclaimed
+ space is added back to the system RAM.
+User: Kdump service
+
+What: /sys/kernel/crash_elfcorehdr_size
+Date: Aug 2023
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates the preferred size of the memory buffer for the
+ ELF core header used by the crash (kdump) kernel. It defines
+ how much space is needed to hold metadata about the crashed
+ system, including CPU and memory information. This information
+ is used by the user space utility kexec to support updating the
+ in-kernel kdump image during hotplug operations.
+User: Kexec tools
+
+What: /sys/kernel/kexec_crash_cma_ranges
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Provides information about the memory ranges reserved from
+ the Contiguous Memory Allocator (CMA) area that are allocated
+ to the crash (kdump) kernel. It lists the start and end physical
+ addresses of CMA regions assigned for crashkernel use.
+User: kdump service
diff --git a/Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop b/Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop
new file mode 100644
index 000000000000..c1dbd19c679c
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-platform-ideapad-laptop
@@ -0,0 +1,8 @@
+What: /sys/bus/platform/devices/VPC2004:*/conservation_mode
+Date: Aug 2017
+KernelVersion: 4.14
+Contact: platform-driver-x86@vger.kernel.org
+Description:
+ Controls whether the conservation mode is enabled or not.
+ This feature limits the maximum battery charge percentage to
+ around 50-60% in order to prolong the lifetime of the battery.
diff --git a/Documentation/ABI/obsolete/sysfs-selinux-user b/Documentation/ABI/obsolete/sysfs-selinux-user
new file mode 100644
index 000000000000..8ab7557f283f
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-selinux-user
@@ -0,0 +1,12 @@
+What: /sys/fs/selinux/user
+Date: April 2005 (predates git)
+KernelVersion: 2.6.12-rc2 (predates git)
+Contact: selinux@vger.kernel.org
+Description:
+
+ The selinuxfs "user" node allows userspace to request a list
+ of security contexts that can be reached for a given SELinux
+ user from a given starting context. This was used by libselinux
+ when various login-style programs requested contexts for
+ users, but libselinux stopped using it in 2020.
+ Kernel support will be removed no sooner than Dec 2025.
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/removed/sysfs-class-cxl
index cfc48a87706b..266c413b96e8 100644
--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/removed/sysfs-class-cxl
@@ -1,3 +1,5 @@
+The cxl driver was removed in 6.15.
+
Please note that attributes that are shared between devices are stored in
the directory pointed to by the symlink device/.
For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
@@ -7,7 +9,7 @@ For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
Slave contexts (eg. /sys/class/cxl/afu0.0s):
What: /sys/class/cxl/<afu>/afu_err_buf
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
AFU Error Buffer contents. The contents of this file are
@@ -18,7 +20,7 @@ Description: read only
What: /sys/class/cxl/<afu>/irqs_max
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
Decimal value of maximum number of interrupts that can be
@@ -29,7 +31,7 @@ Description: read/write
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/irqs_min
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the minimum number of interrupts that
@@ -39,7 +41,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/mmio_size
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the size of the MMIO space that may be mmapped
@@ -47,7 +49,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/modes_supported
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
List of the modes this AFU supports. One per line.
@@ -55,7 +57,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/mode
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
The current mode the AFU is using. Will be one of the modes
@@ -65,7 +67,7 @@ Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/prefault_mode
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
Set the mode for prefaulting in segments into the segment table
@@ -85,7 +87,7 @@ Description: read/write
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/reset
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: write only
Writing 1 here will reset the AFU provided there are not
@@ -93,14 +95,14 @@ Description: write only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/api_version
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the current version of the kernel/user API.
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/api_version_compatible
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the lowest version of the userspace API
@@ -114,7 +116,7 @@ An AFU may optionally export one or more PCIe like configuration records, known
as AFU configuration records, which will show up here (if present).
What: /sys/class/cxl/<afu>/cr<config num>/vendor
-Date: February 2015
+Date: February 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Hexadecimal value of the vendor ID found in this AFU
@@ -122,7 +124,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/cr<config num>/device
-Date: February 2015
+Date: February 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Hexadecimal value of the device ID found in this AFU
@@ -130,7 +132,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/cr<config num>/class
-Date: February 2015
+Date: February 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Hexadecimal value of the class code found in this AFU
@@ -138,7 +140,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>/cr<config num>/config
-Date: February 2015
+Date: February 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
This binary file provides raw access to the AFU configuration
@@ -152,7 +154,7 @@ Users: https://github.com/ibm-capi/libcxl
Master contexts (eg. /sys/class/cxl/afu0.0m)
What: /sys/class/cxl/<afu>m/mmio_size
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the size of the MMIO space that may be mmapped
@@ -160,14 +162,14 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>m/pp_mmio_len
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Decimal value of the Per Process MMIO space length.
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<afu>m/pp_mmio_off
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
(not in a guest)
@@ -178,21 +180,21 @@ Users: https://github.com/ibm-capi/libcxl
Card info (eg. /sys/class/cxl/card0)
What: /sys/class/cxl/<card>/caia_version
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Identifies the CAIA Version the card implements.
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/psl_revision
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Identifies the revision level of the PSL.
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/base_image
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
(not in a guest)
@@ -203,7 +205,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/image_loaded
-Date: September 2014
+Date: September 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
(not in a guest)
@@ -212,7 +214,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/load_image_on_perst
-Date: December 2014
+Date: December 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
(not in a guest)
@@ -229,7 +231,7 @@ Description: read/write
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/reset
-Date: October 2014
+Date: October 2014, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: write only
Writing 1 will issue a PERST to card provided there are no
@@ -240,7 +242,7 @@ Description: write only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/perst_reloads_same_image
-Date: July 2015
+Date: July 2015, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read/write
(not in a guest)
@@ -254,7 +256,7 @@ Description: read/write
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/psl_timebase_synced
-Date: March 2016
+Date: March 2016, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Returns 1 if the psl timebase register is synchronized
@@ -262,7 +264,7 @@ Description: read only
Users: https://github.com/ibm-capi/libcxl
What: /sys/class/cxl/<card>/tunneled_ops_supported
-Date: May 2018
+Date: May 2018, removed February 2025
Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Returns 1 if tunneled operations are supported in capi mode,
diff --git a/Documentation/ABI/removed/sysfs-class-rfkill b/Documentation/ABI/removed/sysfs-class-rfkill
index f25174eafd55..20cb688af173 100644
--- a/Documentation/ABI/removed/sysfs-class-rfkill
+++ b/Documentation/ABI/removed/sysfs-class-rfkill
@@ -4,7 +4,7 @@ For details to this subsystem look at Documentation/driver-api/rfkill.rst.
What: /sys/class/rfkill/rfkill[0-9]+/claim
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: This file was deprecated because there no longer was a way to
claim just control over a single rfkill instance.
diff --git a/Documentation/ABI/removed/sysfs-firmware-efi-vars b/Documentation/ABI/removed/sysfs-firmware-efi-vars
new file mode 100644
index 000000000000..8d97368b149b
--- /dev/null
+++ b/Documentation/ABI/removed/sysfs-firmware-efi-vars
@@ -0,0 +1,12 @@
+What: /sys/firmware/efi/vars
+Date: April 2004, removed March 2023
+Description:
+ This directory exposed interfaces for interacting with
+ EFI variables. For more information on EFI variables,
+ see 'Variable Services' in the UEFI specification
+ (section 7.2 in specification version 2.3 Errata D).
+
+ The 'efivars' sysfs interface was removed in March of 2023,
+ after being considered deprecated no later than September
+ of 2020. Its functionality has been replaced by the
+ 'efivarfs' filesystem.
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 1fe9a553c37b..0ed10aeff86b 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -21,10 +21,63 @@ Description:
device is offset from the internal allocation unit's
natural alignment.
+What: /sys/block/<disk>/atomic_write_max_bytes
+Date: February 2024
+Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
+Description:
+ [RO] This parameter specifies the maximum atomic write
+ size reported by the device. This parameter is relevant
+ for merging of writes, where a merged atomic write
+ operation must not exceed this number of bytes.
+ This parameter may be greater than the value in
+ atomic_write_unit_max_bytes as
+ atomic_write_unit_max_bytes will be rounded down to a
+ power-of-two and atomic_write_unit_max_bytes may also be
+ limited by some other queue limits, such as max_segments.
+ This parameter - along with atomic_write_unit_min_bytes
+ and atomic_write_unit_max_bytes - will not be larger than
+ max_hw_sectors_kb, but may be larger than max_sectors_kb.
+
+
+What: /sys/block/<disk>/atomic_write_unit_min_bytes
+Date: February 2024
+Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
+Description:
+ [RO] This parameter specifies the smallest block which can
+ be written atomically with an atomic write operation. All
+ atomic write operations must begin at a
+ atomic_write_unit_min boundary and must be multiples of
+ atomic_write_unit_min. This value must be a power-of-two.
+
+
+What: /sys/block/<disk>/atomic_write_unit_max_bytes
+Date: February 2024
+Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
+Description:
+ [RO] This parameter defines the largest block which can be
+ written atomically with an atomic write operation. This
+ value must be a multiple of atomic_write_unit_min and must
+ be a power-of-two. This value will not be larger than
+ atomic_write_max_bytes.
+
+
+What: /sys/block/<disk>/atomic_write_boundary_bytes
+Date: February 2024
+Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
+Description:
+ [RO] A device may need to internally split an atomic write I/O
+ which straddles a given logical block address boundary. This
+ parameter specifies the size in bytes of the atomic boundary if
+ one is reported by the device. This value must be a
+ power-of-two and at least the size as in
+ atomic_write_unit_max_bytes.
+ Any attempt to merge atomic write I/Os must not result in a
+ merged I/O which crosses this boundary (if any).
+
What: /sys/block/<disk>/diskseq
Date: February 2021
-Contact: Matteo Croce <mcroce@microsoft.com>
+Contact: Matteo Croce <teknoraver@meta.com>
Description:
The /sys/block/<disk>/diskseq files reports the disk
sequence number, which is a monotonically increasing
@@ -56,6 +109,10 @@ Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Indicates whether a storage device is capable of storing
integrity metadata. Set if the device is T10 PI-capable.
+ This flag is set to 1 if the storage media is formatted
+ with T10 Protection Information. If the storage media is
+ not formatted with T10 Protection Information, this flag
+ is set to 0.
What: /sys/block/<disk>/integrity/format
@@ -64,6 +121,13 @@ Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Metadata format for integrity capable block device.
E.g. T10-DIF-TYPE1-CRC.
+ This field describes the type of T10 Protection Information
+ that the block device can send and receive.
+ If the device can store application integrity metadata but
+ no T10 Protection Information profile is used, this field
+ contains "nop".
+ If the device does not support integrity metadata, this
+ field contains "none".
What: /sys/block/<disk>/integrity/protection_interval_bytes
@@ -89,7 +153,17 @@ Date: June 2008
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Number of bytes of integrity tag space available per
- 512 bytes of data.
+ protection_interval_bytes, which is typically
+ the device's logical block size.
+ This field describes the size of the application tag
+ if the storage device is formatted with T10 Protection
+ Information and permits use of the application tag.
+ The tag_size is reported in bytes and indicates the
+ space available for adding an opaque tag to each block
+ (protection_interval_bytes).
+ If the device does not support T10 Protection Information
+ (even if the device provides application integrity
+ metadata space), this field is set to 0.
What: /sys/block/<disk>/integrity/write_generate
@@ -101,6 +175,16 @@ Description:
devices that support receiving integrity metadata.
+What: /sys/block/<disk>/partscan
+Date: May 2024
+Contact: Christoph Hellwig <hch@lst.de>
+Description:
+ The /sys/block/<disk>/partscan files reports if partition
+ scanning is enabled for the disk. It returns "1" if partition
+ scanning is enabled, or "0" if not. The value type is a 32-bit
+ unsigned integer, but only "0" and "1" are valid values.
+
+
What: /sys/block/<disk>/<partition>/alignment_offset
Date: April 2009
Contact: Martin K. Petersen <martin.petersen@oracle.com>
@@ -166,6 +250,17 @@ Description:
encryption, refer to Documentation/block/inline-encryption.rst.
+What: /sys/block/<disk>/queue/crypto/hw_wrapped_keys
+Date: February 2025
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] The presence of this file indicates that the device
+ supports hardware-wrapped inline encryption keys, i.e. key blobs
+ that can only be unwrapped and used by dedicated hardware. For
+ more information about hardware-wrapped inline encryption keys,
+ see Documentation/block/inline-encryption.rst.
+
+
What: /sys/block/<disk>/queue/crypto/max_dun_bits
Date: February 2022
Contact: linux-block@vger.kernel.org
@@ -204,6 +299,15 @@ Description:
use with inline encryption.
+What: /sys/block/<disk>/queue/crypto/raw_keys
+Date: February 2025
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] The presence of this file indicates that the device
+ supports raw inline encryption keys, i.e. keys that are managed
+ in raw, plaintext form in software.
+
+
What: /sys/block/<disk>/queue/dax
Date: June 2016
Contact: linux-block@vger.kernel.org
@@ -361,6 +465,13 @@ Description:
[RW] This file is used to control (on/off) the iostats
accounting of the disk.
+What: /sys/block/<disk>/queue/iostats_passthrough
+Date: October 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RW] This file is used to control (on/off) the iostats
+ accounting of the disk for passthrough commands.
+
What: /sys/block/<disk>/queue/logical_block_size
Date: May 2009
@@ -436,6 +547,21 @@ Description:
[RO] Maximum size in bytes of a single element in a DMA
scatter/gather list.
+What: /sys/block/<disk>/queue/max_write_streams
+Date: November 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Maximum number of write streams supported, 0 if not
+ supported. If supported, valid values are 1 through
+ max_write_streams, inclusive.
+
+What: /sys/block/<disk>/queue/write_stream_granularity
+Date: November 2024
+Contact: linux-block@vger.kernel.org
+Description:
+ [RO] Granularity of a write stream in bytes. The granularity
+ of a write stream is the size that should be discarded or
+ overwritten together to avoid write amplification in the device.
What: /sys/block/<disk>/queue/max_segments
Date: March 2010
@@ -477,16 +603,10 @@ Date: July 2003
Contact: linux-block@vger.kernel.org
Description:
[RW] This controls how many requests may be allocated in the
- block layer for read or write requests. Note that the total
- allocated number may be twice this amount, since it applies only
- to reads or writes (not the accumulated sum).
-
- To avoid priority inversion through request starvation, a
- request queue maintains a separate request pool per each cgroup
- when CONFIG_BLK_CGROUP is enabled, and this parameter applies to
- each such per-block-cgroup request pool. IOW, if there are N
- block cgroups, each request queue may have up to N request
- pools, each independently regulated by nr_requests.
+ block layer. Noted this value only represents the quantity for a
+ single blk_mq_tags instance. The actual number for the entire
+ device depends on the hardware queue count, whether elevator is
+ enabled, and whether tags are shared.
What: /sys/block/<disk>/queue/nr_zones
@@ -531,6 +651,9 @@ Description:
[RW] Maximum number of kilobytes to read-ahead for filesystems
on this block device.
+ For MADV_HUGEPAGE, the readahead size may exceed this setting
+ since its granularity is based on the hugepage size.
+
What: /sys/block/<disk>/queue/rotational
Date: January 2009
@@ -584,18 +707,6 @@ Description:
the data. If no such restriction exists, this file will contain
'0'. This file is writable for testing purposes.
-
-What: /sys/block/<disk>/queue/throttle_sample_time
-Date: March 2017
-Contact: linux-block@vger.kernel.org
-Description:
- [RW] This is the time window that blk-throttle samples data, in
- millisecond. blk-throttle makes decision based on the
- samplings. Lower time means cgroups have more smooth throughput,
- but higher CPU overhead. This exists only when
- CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
-
-
What: /sys/block/<disk>/queue/virt_boundary_mask
Date: April 2021
Contact: linux-block@vger.kernel.org
@@ -614,7 +725,7 @@ Contact: linux-block@vger.kernel.org
Description:
[RW] If the device is registered for writeback throttling, then
this file shows the target minimum read latency. If this latency
- is exceeded in a given window of time (see wb_window_usec), then
+ is exceeded in a given window of time (see curr_win_nsec), then
the writeback throttling will start scaling back writes. Writing
a value of '0' to this file disables the feature. Writing a
value of '-1' to this file resets the value to the default
@@ -661,6 +772,39 @@ Description:
0, write zeroes is not supported by the device.
+What: /sys/block/<disk>/queue/write_zeroes_unmap_max_hw_bytes
+Date: January 2025
+Contact: Zhang Yi <yi.zhang@huawei.com>
+Description:
+ [RO] This file indicates whether a device supports zeroing data
+ in a specified block range without incurring the cost of
+ physically writing zeroes to the media for each individual
+ block. If this parameter is set to write_zeroes_max_bytes, the
+ device implements a zeroing operation which opportunistically
+ avoids writing zeroes to media while still guaranteeing that
+ subsequent reads from the specified block range will return
+ zeroed data. This operation is a best-effort optimization, a
+ device may fall back to physically writing zeroes to the media
+ due to other factors such as misalignment or being asked to
+ clear a block range smaller than the device's internal
+ allocation unit. If this parameter is set to 0, the device may
+ have to write each logical block media during a zeroing
+ operation.
+
+
+What: /sys/block/<disk>/queue/write_zeroes_unmap_max_bytes
+Date: January 2025
+Contact: Zhang Yi <yi.zhang@huawei.com>
+Description:
+ [RW] While write_zeroes_unmap_max_hw_bytes is the hardware limit
+ for the device, this setting is the software limit. Since the
+ unmap write zeroes operation is a best-effort optimization, some
+ devices may still physically writing zeroes to media. So the
+ speed of this operation is not guaranteed. Writing a value of
+ '0' to this file disables this operation. Otherwise, this
+ parameter should be equal to write_zeroes_unmap_max_hw_bytes.
+
+
What: /sys/block/<disk>/queue/zone_append_max_bytes
Date: May 2020
Contact: linux-block@vger.kernel.org
diff --git a/Documentation/ABI/stable/sysfs-bus-mhi b/Documentation/ABI/stable/sysfs-bus-mhi
index 1a47f9e0cc84..8b9698fa0beb 100644
--- a/Documentation/ABI/stable/sysfs-bus-mhi
+++ b/Documentation/ABI/stable/sysfs-bus-mhi
@@ -29,3 +29,16 @@ Description: Initiates a SoC reset on the MHI controller. A SoC reset is
This can be useful as a method of recovery if the device is
non-responsive, or as a means of loading new firmware as a
system administration task.
+
+What: /sys/bus/mhi/devices/.../trigger_edl
+Date: April 2024
+KernelVersion: 6.10
+Contact: mhi@lists.linux.dev
+Description: Writing a non-zero value to this file will force devices to
+ enter EDL (Emergency Download) mode. This entry only exists for
+ devices capable of entering the EDL mode using the standard EDL
+ triggering mechanism defined in the MHI spec v1.2. Once in EDL
+ mode, the flash programmer image can be downloaded to the
+ device to enter the flash programmer execution environment.
+ This can be useful if user wants to use QDL (Qualcomm Download,
+ which is used to download firmware over EDL) to update firmware.
diff --git a/Documentation/ABI/stable/sysfs-bus-nvmem b/Documentation/ABI/stable/sysfs-bus-nvmem
index c399323f37de..0ae8cb074acf 100644
--- a/Documentation/ABI/stable/sysfs-bus-nvmem
+++ b/Documentation/ABI/stable/sysfs-bus-nvmem
@@ -1,6 +1,23 @@
+What: /sys/bus/nvmem/devices/.../force_ro
+Date: June 2024
+KernelVersion: 6.11
+Contact: Marek Vasut <marex@denx.de>
+Description:
+ This read/write attribute allows users to set read-write
+ devices as read-only and back to read-write from userspace.
+ This can be used to unlock and relock write-protection of
+ devices which are generally locked, except during sporadic
+ programming operation.
+ Read returns '0' or '1' for read-write or read-only modes
+ respectively.
+ Write parses one of 'YyTt1NnFf0', or [oO][NnFf] for "on"
+ and "off", i.e. what kstrtobool() supports.
+ Note: This file is only present if CONFIG_NVMEM_SYSFS
+ is enabled.
+
What: /sys/bus/nvmem/devices/.../nvmem
Date: July 2015
-KernelVersion: 4.2
+KernelVersion: 4.2
Contact: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Description:
This file allows user to read/write the raw NVMEM contents.
@@ -20,3 +37,14 @@ Description:
...
*
0001000
+
+What: /sys/bus/nvmem/devices/.../type
+Date: November 2018
+KernelVersion: 5.0
+Contact: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Description:
+ This read-only attribute allows user to read the NVMEM
+ device type. Supported types are "Unknown", "EEPROM",
+ "OTP", "Battery backed", "FRAM".
+ Note: This file is only present if CONFIG_NVMEM_SYSFS
+ is enabled.
diff --git a/Documentation/ABI/stable/sysfs-class-backlight b/Documentation/ABI/stable/sysfs-class-backlight
index 023fb52645f8..40b8c46b95b2 100644
--- a/Documentation/ABI/stable/sysfs-class-backlight
+++ b/Documentation/ABI/stable/sysfs-class-backlight
@@ -3,10 +3,11 @@ Date: April 2005
KernelVersion: 2.6.12
Contact: Richard Purdie <rpurdie@rpsys.net>
Description:
- Control BACKLIGHT power, values are FB_BLANK_* from fb.h
+ Control BACKLIGHT power, values are compatible with
+ FB_BLANK_* from fb.h
- - FB_BLANK_UNBLANK (0) : power on.
- - FB_BLANK_POWERDOWN (4) : power off
+ - 0 (FB_BLANK_UNBLANK) : power on.
+ - 4 (FB_BLANK_POWERDOWN) : power off
Users: HAL
What: /sys/class/backlight/<backlight>/brightness
@@ -25,7 +26,12 @@ Date: March 2006
KernelVersion: 2.6.17
Contact: Richard Purdie <rpurdie@rpsys.net>
Description:
- Show the actual brightness by querying the hardware.
+ Show the actual brightness by querying the hardware. Due
+ to implementation differences in hardware this may not
+ match the value in 'brightness'. For example some hardware
+ may treat blanking differently or have custom power saving
+ features. Userspace should generally use the values in
+ 'brightness' to make decisions.
Users: HAL
What: /sys/class/backlight/<backlight>/max_brightness
diff --git a/Documentation/ABI/stable/sysfs-class-bluetooth b/Documentation/ABI/stable/sysfs-class-bluetooth
new file mode 100644
index 000000000000..36be02471174
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-bluetooth
@@ -0,0 +1,9 @@
+What: /sys/class/bluetooth/hci<index>/reset
+Date: 14-Jan-2025
+KernelVersion: 6.13
+Contact: linux-bluetooth@vger.kernel.org
+Description: This write-only attribute allows users to trigger the vendor reset
+ method on the Bluetooth device when arbitrary data is written.
+ The reset may or may not be done through the device transport
+ (e.g., UART/USB), and can also be done through an out-of-band
+ approach such as GPIO.
diff --git a/Documentation/ABI/stable/sysfs-class-rfkill b/Documentation/ABI/stable/sysfs-class-rfkill
index 037979f7dc4b..67b605e3dd16 100644
--- a/Documentation/ABI/stable/sysfs-class-rfkill
+++ b/Documentation/ABI/stable/sysfs-class-rfkill
@@ -16,7 +16,7 @@ Description: The rfkill class subsystem folder.
What: /sys/class/rfkill/rfkill[0-9]+/name
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: Name assigned by driver to this key (interface or driver name).
Values: arbitrary string.
@@ -24,7 +24,7 @@ Values: arbitrary string.
What: /sys/class/rfkill/rfkill[0-9]+/type
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: Driver type string ("wlan", "bluetooth", etc).
Values: See include/linux/rfkill.h.
@@ -32,7 +32,7 @@ Values: See include/linux/rfkill.h.
What: /sys/class/rfkill/rfkill[0-9]+/persistent
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: Whether the soft blocked state is initialised from non-volatile
storage at startup.
@@ -44,7 +44,7 @@ Values: A numeric value:
What: /sys/class/rfkill/rfkill[0-9]+/state
Date: 09-Jul-2007
-KernelVersion v2.6.22
+KernelVersion: v2.6.22
Contact: linux-wireless@vger.kernel.org
Description: Current state of the transmitter.
This file was scheduled to be removed in 2014, but due to its
@@ -67,7 +67,7 @@ Values: A numeric value.
What: /sys/class/rfkill/rfkill[0-9]+/hard
Date: 12-March-2010
-KernelVersion v2.6.34
+KernelVersion: v2.6.34
Contact: linux-wireless@vger.kernel.org
Description: Current hardblock state. This file is read only.
Values: A numeric value.
@@ -81,7 +81,7 @@ Values: A numeric value.
What: /sys/class/rfkill/rfkill[0-9]+/soft
Date: 12-March-2010
-KernelVersion v2.6.34
+KernelVersion: v2.6.34
Contact: linux-wireless@vger.kernel.org
Description: Current softblock state. This file is read and write.
Values: A numeric value.
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 402af4b2b905..2d0e023f22a7 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -177,6 +177,12 @@ Description:
The cache write policy: 0 for write-back, 1 for write-through,
other or unknown.
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/address_mode
+Date: March 2025
+Contact: Dave Jiang <dave.jiang@intel.com>
+Description:
+ The address mode: 0 for reserved, 1 for extended-linear.
+
What: /sys/devices/system/node/nodeX/x86/sgx_total_bytes
Date: November 2021
Contact: Jarkko Sakkinen <jarkko@kernel.org>
@@ -221,3 +227,12 @@ Contact: Jiaqi Yan <jiaqiyan@google.com>
Description:
Of the raw poisoned pages on a NUMA node, how many pages are
recovered by memory error recovery attempt.
+
+What: /sys/devices/system/node/nodeX/reclaim
+Date: June 2025
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Perform user-triggered proactive reclaim on a NUMA node.
+ This interface is equivalent to the memcg variant.
+
+ See Documentation/admin-guide/cgroup-v2.rst
diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
index 902392d7eddf..cf78bd99f6c8 100644
--- a/Documentation/ABI/stable/sysfs-devices-system-cpu
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -24,12 +24,6 @@ Description: Default value for the Data Stream Control Register (DSCR) on
If set by a process it will be inherited by child processes.
Values: 64 bit unsigned integer (bit field)
-What: /sys/devices/system/cpu/cpuX/topology/physical_package_id
-Description: physical package id of cpuX. Typically corresponds to a physical
- socket number, but the actual value is architecture and platform
- dependent.
-Values: integer
-
What: /sys/devices/system/cpu/cpuX/topology/die_id
Description: the CPU die ID of cpuX. Typically it is the hardware platform's
identifier (rather than the kernel's). The actual value is
@@ -86,10 +80,6 @@ What: /sys/devices/system/cpu/cpuX/topology/die_cpus
Description: internal kernel map of CPUs within the same die.
Values: hexadecimal bitmask.
-What: /sys/devices/system/cpu/cpuX/topology/ppin
-Description: per-socket protected processor inventory number
-Values: hexadecimal.
-
What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list
Description: human-readable list of CPUs within the same die.
The format is like 0-3, 8-11, 14,17.
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index f2ec42949a54..4a355e6747ae 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -246,14 +246,14 @@ Description: Controls whether PRS disable is turned on for the workqueue.
capability.
What: /sys/bus/dsa/devices/wq<m>.<n>/occupancy
-Date May 25, 2021
+Date: May 25, 2021
KernelVersion: 5.14.0
Contact: dmaengine@vger.kernel.org
Description: Show the current number of entries in this WQ if WQ Occupancy
Support bit WQ capabilities is 1.
What: /sys/bus/dsa/devices/wq<m>.<n>/enqcmds_retries
-Date Oct 29, 2021
+Date: Oct 29, 2021
KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
Description: Indicate the number of retires for an enqcmds submission on a sharedwq.
diff --git a/Documentation/ABI/stable/sysfs-driver-misc-cp500 b/Documentation/ABI/stable/sysfs-driver-misc-cp500
new file mode 100644
index 000000000000..525bd18a2db4
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-misc-cp500
@@ -0,0 +1,25 @@
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/0000:XX:XX.X/version
+Date: June 2024
+KernelVersion: 6.11
+Contact: Gerhard Engleder <eg@keba.com>
+Description: Version of the FPGA configuration bitstream as printable string.
+ This file is read only.
+Users: KEBA
+
+What: /sys/devices/pciXXXX:XX/0000:XX:XX.X/0000:XX:XX.X/keep_cfg
+Date: June 2024
+KernelVersion: 6.11
+Contact: Gerhard Engleder <eg@keba.com>
+Description: Flag which signals if FPGA shall keep or reload configuration
+ bitstream on reset. Normal FPGA behavior and default is to keep
+ configuration bitstream and to only reset the configured logic.
+
+ Reloading configuration on reset enables an update of the
+ configuration bitstream with a simple reboot. Otherwise it is
+ necessary to power cycle the device to reload the new
+ configuration bitstream.
+
+ This file is read/write. The values are as follows:
+ 1 = keep configuration bitstream on reset, default
+ 0 = reload configuration bitstream on reset
+Users: KEBA
diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
index 2cdfd09123da..f59461111221 100644
--- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io
+++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
@@ -715,3 +715,101 @@ Description: This file shows 1 in case the system reset happened due to the
switch board.
The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/global_wp_request
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file when written 1 activates request to allow access to
+ the write protected flashes. Such request can be performed only
+ for system equipped with BMC (Board Management Controller),
+ which can grant access to protected flashes. In case BMC allows
+ access - it will respond with "global_wp_response". BMC decides
+ regarding time window of granted access. After granted window is
+ expired, BMC will change value back to 0.
+ Default value is 0.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/global_wp_response
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file, when set 1, indicates that access to protected
+ flashes have been granted to host CPU by BMC.
+ Default value is 0.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/shutdown_unlock
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: When ASICs are getting overheated, system protection
+ hardware mechanism enforces system reboot. After system
+ reboot ASICs come up in locked state. To unlock ASICs,
+ this file should be written 1
+ Default value is 0.
+
+ The file is read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/boot_progress
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show the Data Process Unit board boot progress
+ state. Valid states are:
+ - 4 : OS starting.
+ - 5 : OS running.
+ - 6 : Low-Power Standby.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/dpu_id
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: This file shows hardware Id of Data Process Unit board.
+
+ The file is read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_aux_pwr_or_reload
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_dpu_thermal
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/reset_from_main_board
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files expose the cause of the most recent reset of the Data
+ Processing Unit (DPU) board. The possible causes are:
+ - Power auxiliary outage or power reload.
+ - Thermal shutdown.
+ - Reset request from the main board.
+ Value 1 in file means this is reset cause, 0 - otherwise. Only one of
+ the above causes could be 1 at the same time, representing only last
+ reset cause.
+
+ The files are read only.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/perst_rst
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/phy_rst
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/tpm_rst
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/usbphy_rst
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files allow to reset hardware components of Data Process
+ Unit board. Respectively PCI, Ethernet PHY, TPM and USB PHY
+ resets.
+ Default values for all the attributes is 1. Writing 0 will
+ cause reset of the related component.
+
+ The files are read/write.
+
+What: /sys/devices/platform/mlxplat/i2c_mlxcpld.*/i2c-*/i2c-*/*-00**/mlxreg-io.*/hwmon/hwmon*/ufm_upgrade
+Date: May 2025
+KernelVersion: 6.16
+Contact: Vadim Pasternak <vadimp@nvidia.com>
+Description: These files show status of Unified Fabric Manager upgrade.
+ state. 0 - means upgrade is done, 1 - otherwise.
+
+ The file is read only.
diff --git a/Documentation/ABI/stable/sysfs-driver-qaic b/Documentation/ABI/stable/sysfs-driver-qaic
new file mode 100644
index 000000000000..c767a93342b3
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-qaic
@@ -0,0 +1,19 @@
+What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/accel/accel<minor_nr>/dbc<N>_state
+Date: October 2025
+KernelVersion: 6.19
+Contact: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
+Description: Represents the current state of DMA Bridge channel (DBC). Below are the possible
+ states:
+
+ =================== ==========================================================
+ IDLE (0) DBC is free and can be activated
+ ASSIGNED (1) DBC is activated and a workload is running on device
+ BEFORE_SHUTDOWN (2) Sub-system associated with this workload has crashed and
+ it will shutdown soon
+ AFTER_SHUTDOWN (3) Sub-system associated with this workload has crashed and
+ it has shutdown
+ BEFORE_POWER_UP (4) Sub-system associated with this workload is shutdown and
+ it will be powered up soon
+ AFTER_POWER_UP (5) Sub-system associated with this workload is now powered up
+ =================== ==========================================================
+Users: Any userspace application or clients interested in DBC state.
diff --git a/Documentation/ABI/stable/sysfs-firmware-efi-vars b/Documentation/ABI/stable/sysfs-firmware-efi-vars
deleted file mode 100644
index 46ccd233e359..000000000000
--- a/Documentation/ABI/stable/sysfs-firmware-efi-vars
+++ /dev/null
@@ -1,79 +0,0 @@
-What: /sys/firmware/efi/vars
-Date: April 2004
-Contact: Matt Domsch <Matt_Domsch@dell.com>
-Description:
- This directory exposes interfaces for interactive with
- EFI variables. For more information on EFI variables,
- see 'Variable Services' in the UEFI specification
- (section 7.2 in specification version 2.3 Errata D).
-
- In summary, EFI variables are named, and are classified
- into separate namespaces through the use of a vendor
- GUID. They also have an arbitrary binary value
- associated with them.
-
- The efivars module enumerates these variables and
- creates a separate directory for each one found. Each
- directory has a name of the form "<key>-<vendor guid>"
- and contains the following files:
-
- =============== ========================================
- attributes: A read-only text file enumerating the
- EFI variable flags. Potential values
- include:
-
- EFI_VARIABLE_NON_VOLATILE
- EFI_VARIABLE_BOOTSERVICE_ACCESS
- EFI_VARIABLE_RUNTIME_ACCESS
- EFI_VARIABLE_HARDWARE_ERROR_RECORD
- EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS
-
- See the EFI documentation for an
- explanation of each of these variables.
-
- data: A read-only binary file that can be read
- to attain the value of the EFI variable
-
- guid: The vendor GUID of the variable. This
- should always match the GUID in the
- variable's name.
-
- raw_var: A binary file that can be read to obtain
- a structure that contains everything
- there is to know about the variable.
- For structure definition see "struct
- efi_variable" in the kernel sources.
-
- This file can also be written to in
- order to update the value of a variable.
- For this to work however, all fields of
- the "struct efi_variable" passed must
- match byte for byte with the structure
- read out of the file, save for the value
- portion.
-
- **Note** the efi_variable structure
- read/written with this file contains a
- 'long' type that may change widths
- depending on your underlying
- architecture.
-
- size: As ASCII representation of the size of
- the variable's value.
- =============== ========================================
-
-
- In addition, two other magic binary files are provided
- in the top-level directory and are used for adding and
- removing variables:
-
- =============== ========================================
- new_var: Takes a "struct efi_variable" and
- instructs the EFI firmware to create a
- new variable.
-
- del_var: Takes a "struct efi_variable" and
- instructs the EFI firmware to remove any
- variable that has a matching vendor GUID
- and variable key name.
- =============== ========================================
diff --git a/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks
new file mode 100644
index 000000000000..825508f42af6
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-kernel-time-aux-clocks
@@ -0,0 +1,5 @@
+What: /sys/kernel/time/aux_clocks/<ID>/enable
+Date: May 2025
+Contact: Thomas Gleixner <tglx@linutronix.de>
+Description:
+ Controls the enablement of auxiliary clock timekeepers.
diff --git a/Documentation/ABI/stable/vdso b/Documentation/ABI/stable/vdso
index 951838d42781..85dbb6a160df 100644
--- a/Documentation/ABI/stable/vdso
+++ b/Documentation/ABI/stable/vdso
@@ -9,9 +9,11 @@ maps an ELF DSO into that program's address space. This DSO is called
the vDSO and it often contains useful and highly-optimized alternatives
to real syscalls.
-These functions are called just like ordinary C function according to
-your platform's ABI. Call them from a sensible context. (For example,
-if you set CS on x86 to something strange, the vDSO functions are
+These functions are called according to your platform's ABI. On many
+platforms they are called just like ordinary C function. On other platforms
+(ex: powerpc) they are called with the same convention as system calls which
+is different from ordinary C functions. Call them from a sensible context.
+(For example, if you set CS on x86 to something strange, the vDSO functions are
within their rights to crash.) In addition, if you pass a bad
pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.
diff --git a/Documentation/ABI/testing/configfs-tsm b/Documentation/ABI/testing/configfs-tsm
deleted file mode 100644
index dd24202b5ba5..000000000000
--- a/Documentation/ABI/testing/configfs-tsm
+++ /dev/null
@@ -1,82 +0,0 @@
-What: /sys/kernel/config/tsm/report/$name/inblob
-Date: September, 2023
-KernelVersion: v6.7
-Contact: linux-coco@lists.linux.dev
-Description:
- (WO) Up to 64 bytes of user specified binary data. For replay
- protection this should include a nonce, but the kernel does not
- place any restrictions on the content.
-
-What: /sys/kernel/config/tsm/report/$name/outblob
-Date: September, 2023
-KernelVersion: v6.7
-Contact: linux-coco@lists.linux.dev
-Description:
- (RO) Binary attestation report generated from @inblob and other
- options The format of the report is implementation specific
- where the implementation is conveyed via the @provider
- attribute.
-
-What: /sys/kernel/config/tsm/report/$name/auxblob
-Date: October, 2023
-KernelVersion: v6.7
-Contact: linux-coco@lists.linux.dev
-Description:
- (RO) Optional supplemental data that a TSM may emit, visibility
- of this attribute depends on TSM, and may be empty if no
- auxiliary data is available.
-
- When @provider is "sev_guest" this file contains the
- "cert_table" from SEV-ES Guest-Hypervisor Communication Block
- Standardization v2.03 Section 4.1.8.1 MSG_REPORT_REQ.
- https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56421.pdf
-
-What: /sys/kernel/config/tsm/report/$name/provider
-Date: September, 2023
-KernelVersion: v6.7
-Contact: linux-coco@lists.linux.dev
-Description:
- (RO) A name for the format-specification of @outblob like
- "sev_guest" [1] or "tdx_guest" [2] in the near term, or a
- common standard format in the future.
-
- [1]: SEV Secure Nested Paging Firmware ABI Specification
- Revision 1.55 Table 22
- https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56860.pdf
-
- [2]: Intel® Trust Domain Extensions Data Center Attestation
- Primitives : Quote Generation Library and Quote Verification
- Library Revision 0.8 Appendix 4,5
- https://download.01.org/intel-sgx/latest/dcap-latest/linux/docs/Intel_TDX_DCAP_Quoting_Library_API.pdf
-
-What: /sys/kernel/config/tsm/report/$name/generation
-Date: September, 2023
-KernelVersion: v6.7
-Contact: linux-coco@lists.linux.dev
-Description:
- (RO) The value in this attribute increments each time @inblob or
- any option is written. Userspace can detect conflicts by
- checking generation before writing to any attribute and making
- sure the number of writes matches expectations after reading
- @outblob, or it can prevent conflicts by creating a report
- instance per requesting context.
-
-What: /sys/kernel/config/tsm/report/$name/privlevel
-Date: September, 2023
-KernelVersion: v6.7
-Contact: linux-coco@lists.linux.dev
-Description:
- (WO) Attribute is visible if a TSM implementation provider
- supports the concept of attestation reports for TVMs running at
- different privilege levels, like SEV-SNP "VMPL", specify the
- privilege level via this attribute. The minimum acceptable
- value is conveyed via @privlevel_floor and the maximum
- acceptable value is TSM_PRIVLEVEL_MAX (3).
-
-What: /sys/kernel/config/tsm/report/$name/privlevel_floor
-Date: September, 2023
-KernelVersion: v6.7
-Contact: linux-coco@lists.linux.dev
-Description:
- (RO) Indicates the minimum permissible value that can be written
- to @privlevel.
diff --git a/Documentation/ABI/testing/configfs-tsm-report b/Documentation/ABI/testing/configfs-tsm-report
new file mode 100644
index 000000000000..534408bc1408
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-tsm-report
@@ -0,0 +1,145 @@
+What: /sys/kernel/config/tsm/report/$name/inblob
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Up to 64 bytes of user specified binary data. For replay
+ protection this should include a nonce, but the kernel does not
+ place any restrictions on the content.
+
+What: /sys/kernel/config/tsm/report/$name/outblob
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) Binary attestation report generated from @inblob and other
+ options The format of the report is implementation specific
+ where the implementation is conveyed via the @provider
+ attribute.
+
+What: /sys/kernel/config/tsm/report/$name/auxblob
+Date: October, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) Optional supplemental data that a TSM may emit, visibility
+ of this attribute depends on TSM, and may be empty if no
+ auxiliary data is available.
+
+ When @provider is "sev_guest" this file contains the
+ "cert_table" from SEV-ES Guest-Hypervisor Communication Block
+ Standardization v2.03 Section 4.1.8.1 MSG_REPORT_REQ.
+ https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56421.pdf
+
+What: /sys/kernel/config/tsm/report/$name/manifestblob
+Date: January, 2024
+KernelVersion: v6.10
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) Optional supplemental data that a TSM may emit, visibility
+ of this attribute depends on TSM, and may be empty if no
+ manifest data is available.
+
+ See 'service_provider' for information on the format of the
+ manifest blob.
+
+What: /sys/kernel/config/tsm/report/$name/provider
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) A name for the format-specification of @outblob like
+ "sev_guest" [1] or "tdx_guest" [2] in the near term, or a
+ common standard format in the future.
+
+ [1]: SEV Secure Nested Paging Firmware ABI Specification
+ Revision 1.55 Table 22
+ https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56860.pdf
+
+ [2]: Intel® Trust Domain Extensions Data Center Attestation
+ Primitives : Quote Generation Library and Quote Verification
+ Library Revision 0.8 Appendix 4,5
+ https://download.01.org/intel-sgx/latest/dcap-latest/linux/docs/Intel_TDX_DCAP_Quoting_Library_API.pdf
+
+What: /sys/kernel/config/tsm/report/$name/generation
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) The value in this attribute increments each time @inblob or
+ any option is written. Userspace can detect conflicts by
+ checking generation before writing to any attribute and making
+ sure the number of writes matches expectations after reading
+ @outblob, or it can prevent conflicts by creating a report
+ instance per requesting context.
+
+What: /sys/kernel/config/tsm/report/$name/privlevel
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Attribute is visible if a TSM implementation provider
+ supports the concept of attestation reports for TVMs running at
+ different privilege levels, like SEV-SNP "VMPL", specify the
+ privilege level via this attribute. The minimum acceptable
+ value is conveyed via @privlevel_floor and the maximum
+ acceptable value is TSM_PRIVLEVEL_MAX (3).
+
+What: /sys/kernel/config/tsm/report/$name/privlevel_floor
+Date: September, 2023
+KernelVersion: v6.7
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) Indicates the minimum permissible value that can be written
+ to @privlevel.
+
+What: /sys/kernel/config/tsm/report/$name/service_provider
+Date: January, 2024
+KernelVersion: v6.10
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Attribute is visible if a TSM implementation provider
+ supports the concept of attestation reports from a service
+ provider for TVMs, like SEV-SNP running under an SVSM.
+ Specifying the service provider via this attribute will create
+ an attestation report as specified by the service provider.
+ The only currently supported service provider is "svsm".
+
+ For the "svsm" service provider, see the Secure VM Service Module
+ for SEV-SNP Guests v1.00 Section 7. For the doc, search for
+ "site:amd.com "Secure VM Service Module for SEV-SNP
+ Guests", docID: 58019"
+
+What: /sys/kernel/config/tsm/report/$name/service_guid
+Date: January, 2024
+KernelVersion: v6.10
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Attribute is visible if a TSM implementation provider
+ supports the concept of attestation reports from a service
+ provider for TVMs, like SEV-SNP running under an SVSM.
+ Specifying an empty/null GUID (00000000-0000-0000-0000-000000)
+ requests all active services within the service provider be
+ part of the attestation report. Specifying a GUID request
+ an attestation report of just the specified service using the
+ manifest form specified by the service_manifest_version
+ attribute.
+
+ See 'service_provider' for information on the format of the
+ service guid.
+
+What: /sys/kernel/config/tsm/report/$name/service_manifest_version
+Date: January, 2024
+KernelVersion: v6.10
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Attribute is visible if a TSM implementation provider
+ supports the concept of attestation reports from a service
+ provider for TVMs, like SEV-SNP running under an SVSM.
+ Indicates the service manifest version requested for the
+ attestation report (default 0). If this field is not set by
+ the user, the default manifest version of the service (the
+ service's initial/first manifest version) is returned.
+
+ See 'service_provider' for information on the format of the
+ service manifest version.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-acm b/Documentation/ABI/testing/configfs-usb-gadget-acm
index d21092d75a05..25e68be9eb66 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-acm
+++ b/Documentation/ABI/testing/configfs-usb-gadget-acm
@@ -6,3 +6,10 @@ Description:
This item contains just one readonly attribute: port_num.
It contains the port number of the /dev/ttyGS<n> device
associated with acm function's instance "name".
+
+What: /config/usb-gadget/gadget/functions/acm.name/protocol
+Date: Aug 2024
+KernelVersion: 6.13
+Description:
+ Reported bInterfaceProtocol for the ACM device. For legacy
+ reasons, this defaults to 1 (USB_CDC_ACM_PROTO_AT_V25TER).
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-ffs b/Documentation/ABI/testing/configfs-usb-gadget-ffs
index e39b27653c65..bf8936ff6d38 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-ffs
+++ b/Documentation/ABI/testing/configfs-usb-gadget-ffs
@@ -4,6 +4,14 @@ KernelVersion: 3.13
Description: The purpose of this directory is to create and remove it.
A corresponding USB function instance is created/removed.
- There are no attributes here.
- All parameters are set through FunctionFS.
+ All attributes are read only:
+
+ ============= ============================================
+ ready 1 if the function is ready to be used, E.G.
+ if userspace has written descriptors and
+ strings to ep0, so the gadget can be
+ enabled - 0 otherwise.
+ ============= ============================================
+
+ All other parameters are set through FunctionFS.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-midi2 b/Documentation/ABI/testing/configfs-usb-gadget-midi2
index 0eac3aaba137..d76a52e2ca7f 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-midi2
+++ b/Documentation/ABI/testing/configfs-usb-gadget-midi2
@@ -47,7 +47,7 @@ Description:
midi1_first_group The first UMP Group number for MIDI 1.0 (0-15)
midi1_num_groups The number of groups for MIDI 1.0 (0-16)
ui_hint 0: unknown, 1: receiver, 2: sender, 3: both
- midi_ci_verison Supported MIDI-CI version number (8 bit)
+ midi_ci_version Supported MIDI-CI version number (8 bit)
is_midi1 Legacy MIDI 1.0 device (0, 1 or 2)
sysex8_streams Max number of SysEx8 streams (8 bit)
active Active FB flag (0 or 1)
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uac1 b/Documentation/ABI/testing/configfs-usb-gadget-uac1
index c4ba92f004c3..64188a85592b 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uac1
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uac1
@@ -30,4 +30,12 @@ Description:
req_number the number of pre-allocated requests
for both capture and playback
function_name name of the interface
+ p_it_name playback input terminal name
+ p_it_ch_name playback channels name
+ p_ot_name playback output terminal name
+ p_fu_vol_name playback mute/volume functional unit name
+ c_it_name capture input terminal name
+ c_it_ch_name capture channels name
+ c_ot_name capture output terminal name
+ c_fu_vol_name capture mute/volume functional unit name
===================== =======================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uac2 b/Documentation/ABI/testing/configfs-usb-gadget-uac2
index a2bf4fd82a5b..133e995c3e92 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uac2
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uac2
@@ -35,6 +35,17 @@ Description:
req_number the number of pre-allocated requests
for both capture and playback
function_name name of the interface
+ if_ctrl_name topology control name
+ clksrc_in_name input clock name
+ clksrc_out_name output clock name
+ p_it_name playback input terminal name
+ p_it_ch_name playback input first channel name
+ p_ot_name playback output terminal name
+ p_fu_vol_name playback mute/volume function unit name
+ c_it_name capture input terminal name
+ c_it_ch_name capture input first channel name
+ c_ot_name capture output terminal name
+ c_fu_vol_name capture mute/volume functional unit name
c_terminal_type code of the capture terminal type
p_terminal_type code of the playback terminal type
===================== =======================================
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
index 4feb692c4c1d..b6720768d63d 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -342,6 +342,70 @@ Description: Specific uncompressed frame descriptors
support
========================= =====================================
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased
+Date: Sept 2024
+KernelVersion: 5.15
+Description: Framebased format descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name
+Date: Sept 2024
+KernelVersion: 5.15
+Description: Specific framebased format descriptors
+
+ ================== =======================================
+ bFormatIndex unique id for this format descriptor;
+ only defined after parent header is
+ linked into the streaming class;
+ read-only
+ bmaControls this format's data for bmaControls in
+ the streaming header
+ bmInterlaceFlags specifies interlace information,
+ read-only
+ bAspectRatioY the X dimension of the picture aspect
+ ratio, read-only
+ bAspectRatioX the Y dimension of the picture aspect
+ ratio, read-only
+ bDefaultFrameIndex optimum frame index for this stream
+ bBitsPerPixel number of bits per pixel used to
+ specify color in the decoded video
+ frame
+ guidFormat globally unique id used to identify
+ stream-encoding format
+ ================== =======================================
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name/name
+Date: Sept 2024
+KernelVersion: 5.15
+Description: Specific framebased frame descriptors
+
+ ========================= =====================================
+ bFrameIndex unique id for this framedescriptor;
+ only defined after parent format is
+ linked into the streaming header;
+ read-only
+ dwFrameInterval indicates how frame interval can be
+ programmed; a number of values
+ separated by newline can be specified
+ dwDefaultFrameInterval the frame interval the device would
+ like to use as default
+ dwBytesPerLine Specifies the number of bytes per line
+ of video for packed fixed frame size
+ formats, allowing the receiver to
+ perform stride alignment of the video.
+ If the bVariableSize value (above) is
+ TRUE (1), or if the format does not
+ permit such alignment, this value shall
+ be set to zero (0).
+ dwMaxBitRate the maximum bit rate at the shortest
+ frame interval in bps
+ dwMinBitRate the minimum bit rate at the longest
+ frame interval in bps
+ wHeight height of decoded bitmap frame in px
+ wWidth width of decoded bitmam frame in px
+ bmCapabilities still image support, fixed frame-rate
+ support
+ ========================= =====================================
+
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/header
Date: Dec 2014
KernelVersion: 4.0
diff --git a/Documentation/ABI/testing/debugfs-alienware-wmi b/Documentation/ABI/testing/debugfs-alienware-wmi
new file mode 100644
index 000000000000..c7f525d6baac
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-alienware-wmi
@@ -0,0 +1,64 @@
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/system_description
+Date: March 2025
+KernelVersion: 6.15
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file exposes the raw ``system_description`` number reported
+ by the WMAX device.
+
+ Only present on devices with the AWCC interface.
+
+ See Documentation/admin-guide/laptops/alienware-wmi.rst for
+ details.
+
+ RO
+
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/hwmon_data
+Date: March 2025
+KernelVersion: 6.15
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file exposes HWMON private data.
+
+ Includes fan sensor count, temperature sensor count, internal
+ fan IDs and internal temp IDs.
+
+ See Documentation/admin-guide/laptops/alienware-wmi.rst for
+ details.
+
+ RO
+
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/pprof_data
+Date: March 2025
+KernelVersion: 6.15
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file exposes Platform Profile private data.
+
+ Includes internal mapping to platform profiles and thermal
+ profile IDs.
+
+ See Documentation/admin-guide/laptops/alienware-wmi.rst for
+ details.
+
+ RO
+
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/gpio_ctl/total_gpios
+Date: May 2025
+KernelVersion: 6.16
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ Total number of GPIO pins reported by the device.
+
+ RO
+
+What: /sys/kernel/debug/alienware-wmi-<wmi_device_name>/gpio_ctl/pinX
+Date: May 2025
+KernelVersion: 6.16
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file controls GPIO pin X status.
+
+ See Documentation/wmi/devices/alienware-wmi.rst for details.
+
+ RW
diff --git a/Documentation/ABI/testing/debugfs-amd-iommu b/Documentation/ABI/testing/debugfs-amd-iommu
new file mode 100644
index 000000000000..5621a66aa693
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-amd-iommu
@@ -0,0 +1,131 @@
+What: /sys/kernel/debug/iommu/amd/iommu<x>/mmio
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file provides read/write access for user input. Users specify the
+ MMIO register offset for iommu<x>, and the file outputs the corresponding
+ MMIO register value of iommu<x>
+
+ Example::
+
+ $ echo "0x18" > /sys/kernel/debug/iommu/amd/iommu00/mmio
+ $ cat /sys/kernel/debug/iommu/amd/iommu00/mmio
+
+ Output::
+
+ Offset:0x18 Value:0x000c22000003f48d
+
+What: /sys/kernel/debug/iommu/amd/iommu<x>/capability
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file provides read/write access for user input. Users specify the
+ capability register offset for iommu<x>, and the file outputs the
+ corresponding capability register value of iommu<x>.
+
+ Example::
+
+ $ echo "0x10" > /sys/kernel/debug/iommu/amd/iommu00/capability
+ $ cat /sys/kernel/debug/iommu/amd/iommu00/capability
+
+ Output::
+
+ Offset:0x10 Value:0x00203040
+
+What: /sys/kernel/debug/iommu/amd/iommu<x>/cmdbuf
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file is a read-only output file containing iommu<x> command
+ buffer entries.
+
+ Examples::
+
+ $ cat /sys/kernel/debug/iommu/amd/iommu<x>/cmdbuf
+
+ Output::
+
+ CMD Buffer Head Offset:339 Tail Offset:339
+ 0: 00835001 10000001 00003c00 00000000
+ 1: 00000000 30000005 fffff003 7fffffff
+ 2: 00835001 10000001 00003c01 00000000
+ 3: 00000000 30000005 fffff003 7fffffff
+ 4: 00835001 10000001 00003c02 00000000
+ 5: 00000000 30000005 fffff003 7fffffff
+ 6: 00835001 10000001 00003c03 00000000
+ 7: 00000000 30000005 fffff003 7fffffff
+ 8: 00835001 10000001 00003c04 00000000
+ 9: 00000000 30000005 fffff003 7fffffff
+ 10: 00835001 10000001 00003c05 00000000
+ 11: 00000000 30000005 fffff003 7fffffff
+ [...]
+
+What: /sys/kernel/debug/iommu/amd/devid
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file provides read/write access for user input. Users specify the
+ device ID, which can be used to dump IOMMU data structures such as the
+ interrupt remapping table and device table.
+
+ Example:
+
+ 1.
+ ::
+
+ $ echo 0000:01:00.0 > /sys/kernel/debug/iommu/amd/devid
+ $ cat /sys/kernel/debug/iommu/amd/devid
+
+ Output::
+
+ 0000:01:00.0
+
+ 2.
+ ::
+
+ $ echo 01:00.0 > /sys/kernel/debug/iommu/amd/devid
+ $ cat /sys/kernel/debug/iommu/amd/devid
+
+ Output::
+
+ 0000:01:00.0
+
+What: /sys/kernel/debug/iommu/amd/devtbl
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file is a read-only output file containing the device table entry
+ for the device ID provided in /sys/kernel/debug/iommu/amd/devid.
+
+ Example::
+
+ $ cat /sys/kernel/debug/iommu/amd/devtbl
+
+ Output::
+
+ DeviceId QWORD[3] QWORD[2] QWORD[1] QWORD[0] iommu
+ 0000:01:00.0 0000000000000000 20000001373b8013 0000000000000038 6000000114d7b603 iommu3
+
+What: /sys/kernel/debug/iommu/amd/irqtbl
+Date: January 2025
+Contact: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
+Description:
+ This file is a read-only output file containing valid IRT table entries
+ for the device ID provided in /sys/kernel/debug/iommu/amd/devid.
+
+ Example::
+
+ $ cat /sys/kernel/debug/iommu/amd/irqtbl
+
+ Output::
+
+ DeviceId 0000:01:00.0
+ IRT[0000] 0000000000000020 0000000000000241
+ IRT[0001] 0000000000000020 0000000000000841
+ IRT[0002] 0000000000000020 0000000000002041
+ IRT[0003] 0000000000000020 0000000000008041
+ IRT[0004] 0000000000000020 0000000000020041
+ IRT[0005] 0000000000000020 0000000000080041
+ IRT[0006] 0000000000000020 0000000000200041
+ IRT[0007] 0000000000000020 0000000000800041
+ [...]
diff --git a/Documentation/ABI/testing/debugfs-cec-error-inj b/Documentation/ABI/testing/debugfs-cec-error-inj
index 8debcb08a3b5..c512f71bba8e 100644
--- a/Documentation/ABI/testing/debugfs-cec-error-inj
+++ b/Documentation/ABI/testing/debugfs-cec-error-inj
@@ -1,6 +1,6 @@
What: /sys/kernel/debug/cec/*/error-inj
Date: March 2018
-Contact: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Contact: Hans Verkuil <hverkuil@kernel.org>
Description:
The CEC Framework allows for CEC error injection commands through
diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
index fe61d372e3fa..2989d4da96c1 100644
--- a/Documentation/ABI/testing/debugfs-cxl
+++ b/Documentation/ABI/testing/debugfs-cxl
@@ -14,12 +14,27 @@ Description:
event to its internal Informational Event log, updates the
Event Status register, and if configured, interrupts the host.
It is not an error to inject poison into an address that
- already has poison present and no error is returned. The
- inject_poison attribute is only visible for devices supporting
- the capability.
+ already has poison present and no error is returned. If the
+ device returns 'Inject Poison Limit Reached' an -EBUSY error
+ is returned to the user. The inject_poison attribute is only
+ visible for devices supporting the capability.
+ TEST-ONLY INTERFACE: This interface is intended for testing
+ and validation purposes only. It is not a data repair mechanism
+ and should never be used on production systems or live data.
-What: /sys/kernel/debug/memX/clear_poison
+ DATA LOSS RISK: For CXL persistent memory (PMEM) devices,
+ poison injection can result in permanent data loss. Injected
+ poison may render data permanently inaccessible even after
+ clearing, as the clear operation writes zeros and does not
+ recover original data.
+
+ SYSTEM STABILITY RISK: For volatile memory, poison injection
+ can cause kernel crashes, system instability, or unpredictable
+ behavior if the poisoned addresses are accessed by running code
+ or critical kernel structures.
+
+What: /sys/kernel/debug/cxl/memX/clear_poison
Date: April, 2023
KernelVersion: v6.4
Contact: linux-cxl@vger.kernel.org
@@ -33,3 +48,110 @@ Description:
device cannot clear poison from the address, -ENXIO is returned.
The clear_poison attribute is only visible for devices
supporting the capability.
+
+ TEST-ONLY INTERFACE: This interface is intended for testing
+ and validation purposes only. It is not a data repair mechanism
+ and should never be used on production systems or live data.
+
+ CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the
+ specified address range and removes the address from the poison
+ list. It does NOT recover or restore original data that may have
+ been present before poison injection. Any original data at the
+ cleared address is permanently lost and replaced with zeros.
+
+ CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing
+ purposes only and should not be used as a data repair tool.
+ Clearing poison is fundamentally different from data recovery
+ or error correction.
+
+What: /sys/kernel/debug/cxl/regionX/inject_poison
+Date: August, 2025
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a Host Physical Address (HPA) is written to this
+ attribute, the region driver translates it to a Device
+ Physical Address (DPA) and identifies the corresponding
+ memdev. It then sends an inject poison command to that memdev
+ at the translated DPA. Refer to the memdev ABI entry at:
+ /sys/kernel/debug/cxl/memX/inject_poison for the detailed
+ behavior. This attribute is only visible if all memdevs
+ participating in the region support both inject and clear
+ poison commands.
+
+ TEST-ONLY INTERFACE: This interface is intended for testing
+ and validation purposes only. It is not a data repair mechanism
+ and should never be used on production systems or live data.
+
+ DATA LOSS RISK: For CXL persistent memory (PMEM) devices,
+ poison injection can result in permanent data loss. Injected
+ poison may render data permanently inaccessible even after
+ clearing, as the clear operation writes zeros and does not
+ recover original data.
+
+ SYSTEM STABILITY RISK: For volatile memory, poison injection
+ can cause kernel crashes, system instability, or unpredictable
+ behavior if the poisoned addresses are accessed by running code
+ or critical kernel structures.
+
+What: /sys/kernel/debug/cxl/regionX/clear_poison
+Date: August, 2025
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) When a Host Physical Address (HPA) is written to this
+ attribute, the region driver translates it to a Device
+ Physical Address (DPA) and identifies the corresponding
+ memdev. It then sends a clear poison command to that memdev
+ at the translated DPA. Refer to the memdev ABI entry at:
+ /sys/kernel/debug/cxl/memX/clear_poison for the detailed
+ behavior. This attribute is only visible if all memdevs
+ participating in the region support both inject and clear
+ poison commands.
+
+ TEST-ONLY INTERFACE: This interface is intended for testing
+ and validation purposes only. It is not a data repair mechanism
+ and should never be used on production systems or live data.
+
+ CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the
+ specified address range and removes the address from the poison
+ list. It does NOT recover or restore original data that may have
+ been present before poison injection. Any original data at the
+ cleared address is permanently lost and replaced with zeros.
+
+ CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing
+ purposes only and should not be used as a data repair tool.
+ Clearing poison is fundamentally different from data recovery
+ or error correction.
+
+What: /sys/kernel/debug/cxl/einj_types
+Date: January, 2024
+KernelVersion: v6.9
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Prints the CXL protocol error types made available by
+ the platform in the format:
+
+ 0x<error number> <error type>
+
+ The possible error types are (as of ACPI v6.5):
+
+ 0x1000 CXL.cache Protocol Correctable
+ 0x2000 CXL.cache Protocol Uncorrectable non-fatal
+ 0x4000 CXL.cache Protocol Uncorrectable fatal
+ 0x8000 CXL.mem Protocol Correctable
+ 0x10000 CXL.mem Protocol Uncorrectable non-fatal
+ 0x20000 CXL.mem Protocol Uncorrectable fatal
+
+ The <error number> can be written to einj_inject to inject
+ <error type> into a chosen dport.
+
+What: /sys/kernel/debug/cxl/$dport_dev/einj_inject
+Date: January, 2024
+KernelVersion: v6.9
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (WO) Writing an integer to this file injects the corresponding
+ CXL protocol error into $dport_dev ($dport_dev will be a device
+ name from /sys/bus/pci/devices). The integer to type mapping for
+ injection can be found by reading from einj_types. If the dport
+ was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
+ a CXL 2.0 error is injected.
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index a7a432dc4015..3318a14f35b9 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -217,7 +217,7 @@ Description: Displays the hop values and physical address for a given ASID
and virtual address. The user should write the ASID and VA into
the file and then read the file to get the result.
e.g. to display info about VA 0x1000 for ASID 1 you need to do:
- echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu
+ echo "1 0x1000" > /sys/kernel/debug/accel/<parent_device>/mmu
What: /sys/kernel/debug/accel/<parent_device>/mmu_error
Date: Mar 2021
@@ -226,8 +226,8 @@ Contact: fkassabri@habana.ai
Description: Check and display page fault or access violation mmu errors for
all MMUs specified in mmu_cap_mask.
e.g. to display error info for MMU hw cap bit 9, you need to do:
- echo "0x200" > /sys/kernel/debug/accel/0/mmu_error
- cat /sys/kernel/debug/accel/0/mmu_error
+ echo "0x200" > /sys/kernel/debug/accel/<parent_device>/mmu_error
+ cat /sys/kernel/debug/accel/<parent_device>/mmu_error
What: /sys/kernel/debug/accel/<parent_device>/monitor_dump
Date: Mar 2022
@@ -253,6 +253,12 @@ Description: Triggers dump of monitor data. The value to trigger the operatio
When the write is finished, the user can read the "monitor_dump"
blob
+What: /sys/kernel/debug/accel/<parent_device>/server_type
+Date: Feb 2024
+KernelVersion: 6.11
+Contact: trisin@habana.ai
+Description: Exposes the device's server type, maps to enum hl_server_type.
+
What: /sys/kernel/debug/accel/<parent_device>/set_power_state
Date: Jan 2019
KernelVersion: 5.1
diff --git a/Documentation/ABI/testing/debugfs-driver-qat b/Documentation/ABI/testing/debugfs-driver-qat
index b2db010d851e..3f1efbbad6ca 100644
--- a/Documentation/ABI/testing/debugfs-driver-qat
+++ b/Documentation/ABI/testing/debugfs-driver-qat
@@ -67,7 +67,7 @@ Contact: qat-linux@intel.com
Description: (RO) Read returns power management information specific to the
QAT device.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/kernel/debug/qat_<device>_<BDF>/cnv_errors
Date: January 2024
@@ -81,3 +81,29 @@ Description: (RO) Read returns, for each Acceleration Engine (AE), the number
<N>: Number of Compress and Verify (CnV) errors and type
of the last CnV error detected by Acceleration
Engine N.
+
+What: /sys/kernel/debug/qat_<device>_<BDF>/heartbeat/inject_error
+Date: March 2024
+KernelVersion: 6.8
+Contact: qat-linux@intel.com
+Description: (WO) Write to inject an error that simulates an heartbeat
+ failure. This is to be used for testing purposes.
+
+ After writing this file, the driver stops arbitration on a
+ random engine and disables the fetching of heartbeat counters.
+ If a workload is running on the device, a job submitted to the
+ accelerator might not get a response and a read of the
+ `heartbeat/status` attribute might report -1, i.e. device
+ unresponsive.
+ The error is unrecoverable thus the device must be restarted to
+ restore its functionality.
+
+ This attribute is available only when the kernel is built with
+ CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION=y.
+
+ A write of 1 enables error injection.
+
+ The following example shows how to enable error injection::
+
+ # cd /sys/kernel/debug/qat_<device>_<BDF>
+ # echo 1 > heartbeat/inject_error
diff --git a/Documentation/ABI/testing/debugfs-driver-qat_telemetry b/Documentation/ABI/testing/debugfs-driver-qat_telemetry
index eacee2072088..06097ee0f154 100644
--- a/Documentation/ABI/testing/debugfs-driver-qat_telemetry
+++ b/Documentation/ABI/testing/debugfs-driver-qat_telemetry
@@ -32,7 +32,7 @@ Description: (RW) Enables/disables the reporting of telemetry metrics.
echo 0 > /sys/kernel/debug/qat_4xxx_0000:6b:00.0/telemetry/control
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/kernel/debug/qat_<device>_<BDF>/telemetry/device_data
Date: March 2024
@@ -57,6 +57,7 @@ Description: (RO) Reports device telemetry counters.
gp_lat_acc_avg average get to put latency [ns]
bw_in PCIe, write bandwidth [Mbps]
bw_out PCIe, read bandwidth [Mbps]
+ re_acc_avg average ring empty time [ns]
at_page_req_lat_avg Address Translator(AT), average page
request latency [ns]
at_trans_lat_avg AT, average page translation latency [ns]
@@ -67,6 +68,10 @@ Description: (RO) Reports device telemetry counters.
exec_xlt<N> execution count of Translator slice N
util_dcpr<N> utilization of Decompression slice N [%]
exec_dcpr<N> execution count of Decompression slice N
+ util_cnv<N> utilization of Compression and verify slice N [%]
+ exec_cnv<N> execution count of Compression and verify slice N
+ util_dcprz<N> utilization of Decompression slice N [%]
+ exec_dcprz<N> execution count of Decompression slice N
util_pke<N> utilization of PKE N [%]
exec_pke<N> execution count of PKE N
util_ucs<N> utilization of UCS slice N [%]
@@ -81,6 +86,32 @@ Description: (RO) Reports device telemetry counters.
exec_cph<N> execution count of Cipher slice N
util_ath<N> utilization of Authentication slice N [%]
exec_ath<N> execution count of Authentication slice N
+ cmdq_wait_cnv<N> wait time for cmdq N to get Compression and verify
+ slice ownership
+ cmdq_exec_cnv<N> Compression and verify slice execution time while
+ owned by cmdq N
+ cmdq_drain_cnv<N> time taken for cmdq N to release Compression and
+ verify slice ownership
+ cmdq_wait_dcprz<N> wait time for cmdq N to get Decompression
+ slice N ownership
+ cmdq_exec_dcprz<N> Decompression slice execution time while
+ owned by cmdq N
+ cmdq_drain_dcprz<N> time taken for cmdq N to release Decompression
+ slice ownership
+ cmdq_wait_pke<N> wait time for cmdq N to get PKE slice ownership
+ cmdq_exec_pke<N> PKE slice execution time while owned by cmdq N
+ cmdq_drain_pke<N> time taken for cmdq N to release PKE slice
+ ownership
+ cmdq_wait_ucs<N> wait time for cmdq N to get UCS slice ownership
+ cmdq_exec_ucs<N> UCS slice execution time while owned by cmdq N
+ cmdq_drain_ucs<N> time taken for cmdq N to release UCS slice
+ ownership
+ cmdq_wait_ath<N> wait time for cmdq N to get Authentication slice
+ ownership
+ cmdq_exec_ath<N> Authentication slice execution time while owned
+ by cmdq N
+ cmdq_drain_ath<N> time taken for cmdq N to release Authentication
+ slice ownership
======================= ========================================
The telemetry report file can be read with the following command::
@@ -100,7 +131,7 @@ Description: (RO) Reports device telemetry counters.
If a device lacks of a specific accelerator, the corresponding
attribute is not reported.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/kernel/debug/qat_<device>_<BDF>/telemetry/rp_<A/B/C/D>_data
Date: March 2024
@@ -225,4 +256,4 @@ Description: (RW) Selects up to 4 Ring Pairs (RP) to monitor, one per file,
``rp2srv`` from sysfs.
See Documentation/ABI/testing/sysfs-driver-qat for details.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/debugfs-dwc-pcie b/Documentation/ABI/testing/debugfs-dwc-pcie
new file mode 100644
index 000000000000..92b76f52a408
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-dwc-pcie
@@ -0,0 +1,157 @@
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_debug/lane_detect
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: (RW) Write the lane number to be checked for detection. Read
+ will return whether PHY indicates receiver detection on the
+ selected lane. The default selected lane is Lane0.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_debug/rx_valid
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: (RW) Write the lane number to be checked as valid or invalid.
+ Read will return the status of PIPE RXVALID signal of the
+ selected lane. The default selected lane is Lane0.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_err_inj/<error>
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: The "rasdes_err_inj" is a directory which can be used to inject
+ errors into the system. The possible errors that can be injected
+ are:
+
+ 1) tx_lcrc - TLP LCRC error injection TX Path
+ 2) b16_crc_dllp - 16b CRC error injection of ACK/NAK DLLP
+ 3) b16_crc_upd_fc - 16b CRC error injection of Update-FC DLLP
+ 4) tx_ecrc - TLP ECRC error injection TX Path
+ 5) fcrc_tlp - TLP's FCRC error injection TX Path
+ 6) parity_tsos - Parity error of TSOS
+ 7) parity_skpos - Parity error on SKPOS
+ 8) rx_lcrc - LCRC error injection RX Path
+ 9) rx_ecrc - ECRC error injection RX Path
+ 10) tlp_err_seq - TLPs SEQ# error
+ 11) ack_nak_dllp_seq - DLLPS ACK/NAK SEQ# error
+ 12) ack_nak_dllp - ACK/NAK DLLPs transmission block
+ 13) upd_fc_dllp - UpdateFC DLLPs transmission block
+ 14) nak_dllp - Always transmission for NAK DLLP
+ 15) inv_sync_hdr_sym - Invert SYNC header
+ 16) com_pad_ts1 - COM/PAD TS1 order set
+ 17) com_pad_ts2 - COM/PAD TS2 order set
+ 18) com_fts - COM/FTS FTS order set
+ 19) com_idl - COM/IDL E-idle order set
+ 20) end_edb - END/EDB symbol
+ 21) stp_sdp - STP/SDP symbol
+ 22) com_skp - COM/SKP SKP order set
+ 23) posted_tlp_hdr - Posted TLP Header credit value control
+ 24) non_post_tlp_hdr - Non-Posted TLP Header credit value control
+ 25) cmpl_tlp_hdr - Completion TLP Header credit value control
+ 26) posted_tlp_data - Posted TLP Data credit value control
+ 27) non_post_tlp_data - Non-Posted TLP Data credit value control
+ 28) cmpl_tlp_data - Completion TLP Data credit value control
+ 29) duplicate_tlp - Generates duplicate TLPs
+ 30) nullified_tlp - Generates Nullified TLPs
+
+ (WO) Write to the attribute will prepare controller to inject
+ the respective error in the next transmission of data.
+
+ Parameter required to write will change in the following ways:
+
+ - Errors 9 and 10 are sequence errors. The write command:
+
+ echo <count> <diff> > /sys/kernel/debug/dwc_pcie_<dev>/rasdes_err_inj/<error>
+
+ <count>
+ Number of errors to be injected
+ <diff>
+ The difference to add or subtract from natural
+ sequence number to generate sequence error.
+ Allowed range from -4095 to 4095
+
+ - Errors 23 to 28 are credit value error insertions. The write
+ command:
+
+ echo <count> <diff> <vc> > /sys/kernel/debug/dwc_pcie_<dev>/rasdes_err_inj/<error>
+
+ <count>
+ Number of errors to be injected
+ <diff>
+ The difference to add or subtract from UpdateFC
+ credit value. Allowed range from -4095 to 4095
+ <vc>
+ Target VC number
+
+ - All other errors. The write command:
+
+ echo <count> > /sys/kernel/debug/dwc_pcie_<dev>/rasdes_err_inj/<error>
+
+ <count>
+ Number of errors to be injected
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_event_counters/<event>/counter_enable
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: The "rasdes_event_counters" is the directory which can be used
+ to collect statistical data about the number of times a certain
+ event has occurred in the controller. The list of possible
+ events are:
+
+ 1) EBUF Overflow
+ 2) EBUF Underrun
+ 3) Decode Error
+ 4) Running Disparity Error
+ 5) SKP OS Parity Error
+ 6) SYNC Header Error
+ 7) Rx Valid De-assertion
+ 8) CTL SKP OS Parity Error
+ 9) 1st Retimer Parity Error
+ 10) 2nd Retimer Parity Error
+ 11) Margin CRC and Parity Error
+ 12) Detect EI Infer
+ 13) Receiver Error
+ 14) RX Recovery Req
+ 15) N_FTS Timeout
+ 16) Framing Error
+ 17) Deskew Error
+ 18) Framing Error In L0
+ 19) Deskew Uncompleted Error
+ 20) Bad TLP
+ 21) LCRC Error
+ 22) Bad DLLP
+ 23) Replay Number Rollover
+ 24) Replay Timeout
+ 25) Rx Nak DLLP
+ 26) Tx Nak DLLP
+ 27) Retry TLP
+ 28) FC Timeout
+ 29) Poisoned TLP
+ 30) ECRC Error
+ 31) Unsupported Request
+ 32) Completer Abort
+ 33) Completion Timeout
+ 34) EBUF SKP Add
+ 35) EBUF SKP Del
+
+ (RW) Write 1 to enable the event counter and write 0 to disable
+ the event counter. Read will return whether the counter is
+ currently enabled or disabled. Counter is disabled by default.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_event_counters/<event>/counter_value
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: (RO) Read will return the current value of the event counter.
+ To reset the counter, counter should be disabled first and then
+ enabled back using the "counter_enable" attribute.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/rasdes_event_counters/<event>/lane_select
+Date: February 2025
+Contact: Shradha Todi <shradha.t@samsung.com>
+Description: (RW) Some lanes in the event list are lane specific events.
+ These include events from 1 to 11, as well as, 34 and 35. Write
+ the lane number for which you wish the counter to be enabled,
+ disabled, or value dumped. Read will return the current
+ selected lane number. Lane0 is selected by default.
+
+What: /sys/kernel/debug/dwc_pcie_<dev>/ltssm_status
+Date: February 2025
+Contact: Hans Zhang <18255117159@163.com>
+Description: (RO) Read will return the current PCIe LTSSM state in both
+ string and raw value.
diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre
index 8e8de49c5cc6..29fb7d5ffc69 100644
--- a/Documentation/ABI/testing/debugfs-hisi-hpre
+++ b/Documentation/ABI/testing/debugfs-hisi-hpre
@@ -111,6 +111,28 @@ Description: QM debug registers(regs) read hardware register value. This
node is used to show the change of the qm register values. This
node can be help users to check the change of register values.
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/qm_state
+Date: Jan 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the state of the device.
+ 0: busy, 1: idle.
+ Only available for PF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/dev_timeout
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Set the wait time when stop queue fails. Available for both PF
+ and VF, and take no other effect on HPRE.
+ 0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/qm/dev_state
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the stop queue status of the QM. The default value is 0,
+ if dev_timeout is set, when stop queue fails, the dev_state
+ will return non-zero value. Available for both PF and VF,
+ and take no other effect on HPRE.
+
What: /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
@@ -162,3 +184,10 @@ Date: Apr 2020
Contact: linux-crypto@vger.kernel.org
Description: Dump the total number of time out requests.
Available for both PF and VF, and take no other effect on HPRE.
+
+What: /sys/kernel/debug/hisi_hpre/<bdf>/cap_regs
+Date: Oct 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the values of the qm and hpre capability bit registers and
+ support the query of device specifications to facilitate fault locating.
+ Available for both PF and VF, and take no other effect on HPRE.
diff --git a/Documentation/ABI/testing/debugfs-hisi-migration b/Documentation/ABI/testing/debugfs-hisi-migration
new file mode 100644
index 000000000000..2c01b2d387dd
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-migration
@@ -0,0 +1,25 @@
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/dev_data
+Date: Jan 2025
+KernelVersion: 6.13
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: Read the configuration data and some status data
+ required for device live migration. These data include device
+ status data, queue configuration data, some task configuration
+ data and device attribute data. The output format of the data
+ is defined by the live migration driver.
+
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/migf_data
+Date: Jan 2025
+KernelVersion: 6.13
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: Read the data from the last completed live migration.
+ This data includes the same device status data as in "dev_data".
+ The migf_data is the dev_data that is migrated.
+
+What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/cmd_state
+Date: Jan 2025
+KernelVersion: 6.13
+Contact: Longfang Liu <liulongfang@huawei.com>
+Description: Used to obtain the device command sending and receiving
+ channel status. Returns failure or success logs based on the
+ results.
diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec
index deeefe2c735e..82bf4a0dc7f7 100644
--- a/Documentation/ABI/testing/debugfs-hisi-sec
+++ b/Documentation/ABI/testing/debugfs-hisi-sec
@@ -91,6 +91,28 @@ Description: QM debug registers(regs) read hardware register value. This
node is used to show the change of the qm register values. This
node can be help users to check the change of register values.
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_state
+Date: Jan 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the state of the device.
+ 0: busy, 1: idle.
+ Only available for PF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/dev_timeout
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Set the wait time when stop queue fails. Available for both PF
+ and VF, and take no other effect on SEC.
+ 0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/dev_state
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the stop queue status of the QM. The default value is 0,
+ if dev_timeout is set, when stop queue fails, the dev_state
+ will return non-zero value. Available for both PF and VF,
+ and take no other effect on SEC.
+
What: /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
@@ -135,3 +157,10 @@ Contact: linux-crypto@vger.kernel.org
Description: Dump the total number of completed but marked error requests
to be received.
Available for both PF and VF, and take no other effect on SEC.
+
+What: /sys/kernel/debug/hisi_sec2/<bdf>/cap_regs
+Date: Oct 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the values of the qm and sec capability bit registers and
+ support the query of device specifications to facilitate fault locating.
+ Available for both PF and VF, and take no other effect on SEC.
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
index 593714afaed2..0abd65d27e9b 100644
--- a/Documentation/ABI/testing/debugfs-hisi-zip
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -104,6 +104,28 @@ Description: QM debug registers(regs) read hardware register value. This
node is used to show the change of the qm registers value. This
node can be help users to check the change of register values.
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/qm_state
+Date: Jan 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the state of the device.
+ 0: busy, 1: idle.
+ Only available for PF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/dev_timeout
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Set the wait time when stop queue fails. Available for both PF
+ and VF, and take no other effect on ZIP.
+ 0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/qm/dev_state
+Date: Feb 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the stop queue status of the QM. The default value is 0,
+ if dev_timeout is set, when stop queue fails, the dev_state
+ will return non-zero value. Available for both PF and VF,
+ and take no other effect on ZIP.
+
What: /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/diff_regs
Date: Mar 2022
Contact: linux-crypto@vger.kernel.org
@@ -136,3 +158,10 @@ Contact: linux-crypto@vger.kernel.org
Description: Dump the total number of BD type error requests
to be received.
Available for both PF and VF, and take no other effect on ZIP.
+
+What: /sys/kernel/debug/hisi_zip/<bdf>/cap_regs
+Date: Oct 2024
+Contact: linux-crypto@vger.kernel.org
+Description: Dump the values of the qm and zip capability bit registers and
+ support the query of device specifications to facilitate fault locating.
+ Available for both PF and VF, and take no other effect on ZIP.
diff --git a/Documentation/ABI/testing/debugfs-iio-ad9467 b/Documentation/ABI/testing/debugfs-iio-ad9467
new file mode 100644
index 000000000000..0352fca1f7f2
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-iio-ad9467
@@ -0,0 +1,39 @@
+What: /sys/kernel/debug/iio/iio:deviceX/calibration_table_dump
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ This dumps the calibration table that was filled during the
+ digital interface tuning process.
+
+What: /sys/kernel/debug/iio/iio:deviceX/in_voltage_test_mode_available
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ List all the available test tones:
+ - off
+ - midscale_short
+ - pos_fullscale
+ - neg_fullscale
+ - checkerboard
+ - prbs23
+ - prbs9
+ - one_zero_toggle
+ - user
+ - bit_toggle
+ - sync
+ - one_bit_high
+ - mixed_bit_frequency
+ - ramp
+
+ Note that depending on the actual device being used, some of the
+ above might not be available (and they won't be listed when
+ reading the file).
+
+What: /sys/kernel/debug/iio/iio:deviceX/in_voltageY_test_mode
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Writing to this file will initiate one of available test tone on
+ channel Y. Reading it, shows which test is running. In cases
+ where an IIO backend is available and supports the test tone,
+ additional information about the data correctness is given.
diff --git a/Documentation/ABI/testing/debugfs-iio-backend b/Documentation/ABI/testing/debugfs-iio-backend
new file mode 100644
index 000000000000..01ab94469432
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-iio-backend
@@ -0,0 +1,20 @@
+What: /sys/kernel/debug/iio/iio:deviceX/backendY/name
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Name of Backend Y connected to device X.
+
+What: /sys/kernel/debug/iio/iio:deviceX/backendY/direct_reg_access
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Directly access the registers of backend Y. Typical usage is:
+
+ Reading address 0x50
+ echo 0x50 > direct_reg_access
+ cat direct_reg_access
+
+ Writing address 0x50
+ echo 0x50 0x3 > direct_reg_access
+ //readback address 0x50
+ cat direct_reg_access
diff --git a/Documentation/ABI/testing/debugfs-intel-iommu b/Documentation/ABI/testing/debugfs-intel-iommu
new file mode 100644
index 000000000000..2ab8464504a9
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-intel-iommu
@@ -0,0 +1,276 @@
+What: /sys/kernel/debug/iommu/intel/iommu_regset
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps all the register contents for each IOMMU device.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/iommu_regset
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+ IOMMU: dmar1 Register Base Address: fed90000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+ IOMMU: dmar2 Register Base Address: fed91000
+
+ Name Offset Contents
+ VER 0x00 0x0000000000000010
+ GCMD 0x18 0x0000000000000000
+ GSTS 0x1c 0x00000000c7000000
+ FSTS 0x34 0x0000000000000000
+ FECTL 0x38 0x0000000000000000
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/ir_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps the table entries for Interrupt
+ remapping and Interrupt posting.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/ir_translation_struct
+
+ Remapped Interrupt supported on IOMMU: dmar0
+ IR table address:100900000
+
+ Entry SrcID DstID Vct IRTE_high IRTE_low
+ 0 00:0a.0 00000080 24 0000000000040050 000000800024000d
+ 1 00:0a.0 00000001 ef 0000000000040050 0000000100ef000d
+
+ Remapped Interrupt supported on IOMMU: dmar1
+ IR table address:100300000
+ Entry SrcID DstID Vct IRTE_high IRTE_low
+ 0 00:02.0 00000002 26 0000000000040010 000000020026000d
+
+ [...]
+
+ ****
+
+ Posted Interrupt supported on IOMMU: dmar0
+ IR table address:100900000
+ Entry SrcID PDA_high PDA_low Vct IRTE_high IRTE_low
+
+What: /sys/kernel/debug/iommu/intel/dmar_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps Intel IOMMU DMA remapping tables, such
+ as root table, context table, PASID directory and PASID
+ table entries in debugfs. For legacy mode, it doesn't
+ support PASID, and hence PASID field is defaulted to
+ '-1' and other PASID related fields are invalid.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_translation_struct
+
+ IOMMU dmar1: Root Table Address: 0x103027000
+ B.D.F Root_entry
+ 00:02.0 0x0000000000000000:0x000000010303e001
+
+ Context_entry
+ 0x0000000000000102:0x000000010303f005
+
+ PASID PASID_table_entry
+ -1 0x0000000000000000:0x0000000000000000:0x0000000000000000
+
+ IOMMU dmar0: Root Table Address: 0x103028000
+ B.D.F Root_entry
+ 00:0a.0 0x0000000000000000:0x00000001038a7001
+
+ Context_entry
+ 0x0000000000000000:0x0000000103220e7d
+
+ PASID PASID_table_entry
+ 0 0x0000000000000000:0x0000000000800002:0x00000001038a5089
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/invalidation_queue
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file exports invalidation queue internals of each
+ IOMMU device.
+
+ Example in Kabylake:
+
+ ::
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/invalidation_queue
+
+ Invalidation queue on IOMMU: dmar0
+ Base: 0x10022e000 Head: 20 Tail: 20
+ Index qw0 qw1 qw2
+ 0 0000000000000014 0000000000000000 0000000000000000
+ 1 0000000200000025 0000000100059c04 0000000000000000
+ 2 0000000000000014 0000000000000000 0000000000000000
+
+ qw3 status
+ 0000000000000000 0000000000000000
+ 0000000000000000 0000000000000000
+ 0000000000000000 0000000000000000
+
+ [...]
+
+ Invalidation queue on IOMMU: dmar1
+ Base: 0x10026e000 Head: 32 Tail: 32
+ Index qw0 qw1 status
+ 0 0000000000000004 0000000000000000 0000000000000000
+ 1 0000000200000025 0000000100059804 0000000000000000
+ 2 0000000000000011 0000000000000000 0000000000000000
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/dmar_perf_latency
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file is used to control and show counts of
+ execution time ranges for various types per DMAR.
+
+ Firstly, write a value to
+ /sys/kernel/debug/iommu/intel/dmar_perf_latency
+ to enable sampling.
+
+ The possible values are as follows:
+
+ * 0 - disable sampling all latency data
+
+ * 1 - enable sampling IOTLB invalidation latency data
+
+ * 2 - enable sampling devTLB invalidation latency data
+
+ * 3 - enable sampling intr entry cache invalidation latency data
+
+ Next, read /sys/kernel/debug/iommu/intel/dmar_perf_latency gives
+ a snapshot of sampling result of all enabled monitors.
+
+ Examples in Kabylake:
+
+ ::
+
+ 1) Disable sampling all latency data:
+
+ $ sudo echo 0 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ 2) Enable sampling IOTLB invalidation latency data
+
+ $ sudo echo 1 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_iotlb 0 0 0 0 0
+
+ 1ms-10ms >=10ms min(us) max(us) average(us)
+ inv_iotlb 0 0 0 0 0
+
+ [...]
+
+ IOMMU: dmar2 Register Base Address: fed91000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_iotlb 0 0 18 0 0
+
+ 1ms-10ms >=10ms min(us) max(us) average(us)
+ inv_iotlb 0 0 2 2 2
+
+ 3) Enable sampling devTLB invalidation latency data
+
+ $ sudo echo 2 > /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/dmar_perf_latency
+
+ IOMMU: dmar0 Register Base Address: 26be37000
+ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms
+ inv_devtlb 0 0 0 0 0
+
+ >=10ms min(us) max(us) average(us)
+ inv_devtlb 0 0 0 0
+
+ [...]
+
+What: /sys/kernel/debug/iommu/intel/<bdf>/domain_translation_struct
+Date: December 2023
+Contact: Jingqi Liu <Jingqi.liu@intel.com>
+Description:
+ This file dumps a specified page table of Intel IOMMU
+ in legacy mode or scalable mode.
+
+ For a device that only supports legacy mode, dump its
+ page table by the debugfs file in the debugfs device
+ directory. e.g.
+ /sys/kernel/debug/iommu/intel/0000:00:02.0/domain_translation_struct.
+
+ For a device that supports scalable mode, dump the
+ page table of specified pasid by the debugfs file in
+ the debugfs pasid directory. e.g.
+ /sys/kernel/debug/iommu/intel/0000:00:02.0/1/domain_translation_struct.
+
+ Examples in Kabylake:
+
+ ::
+
+ 1) Dump the page table of device "0000:00:02.0" that only supports legacy mode.
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/0000:00:02.0/domain_translation_struct
+
+ Device 0000:00:02.0 @0x1017f8000
+ IOVA_PFN PML5E PML4E
+ 0x000000008d800 | 0x0000000000000000 0x00000001017f9003
+ 0x000000008d801 | 0x0000000000000000 0x00000001017f9003
+ 0x000000008d802 | 0x0000000000000000 0x00000001017f9003
+
+ PDPE PDE PTE
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d800003
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d801003
+ 0x00000001017fa003 0x00000001017fb003 0x000000008d802003
+
+ [...]
+
+ 2) Dump the page table of device "0000:00:0a.0" with PASID "1" that
+ supports scalable mode.
+
+ $ sudo cat /sys/kernel/debug/iommu/intel/0000:00:0a.0/1/domain_translation_struct
+
+ Device 0000:00:0a.0 with pasid 1 @0x10c112000
+ IOVA_PFN PML5E PML4E
+ 0x0000000000000 | 0x0000000000000000 0x000000010df93003
+ 0x0000000000001 | 0x0000000000000000 0x000000010df93003
+ 0x0000000000002 | 0x0000000000000000 0x000000010df93003
+
+ PDPE PDE PTE
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c00803
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c01803
+ 0x0000000106ae6003 0x0000000104b38003 0x0000000147c02803
+
+ [...]
diff --git a/Documentation/ABI/testing/debugfs-msi-wmi-platform b/Documentation/ABI/testing/debugfs-msi-wmi-platform
new file mode 100644
index 000000000000..71f9992168d8
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-msi-wmi-platform
@@ -0,0 +1,14 @@
+What: /sys/kernel/debug/msi-wmi-platform-<wmi_device_name>/*
+Date: April 2024
+KernelVersion: 6.10
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ This file allows to execute the associated WMI method with the same name.
+
+ To start the execution, write a buffer containing the method arguments
+ at file offset 0. Partial writes or writes at a different offset are not
+ supported.
+
+ The buffer returned by the WMI method can then be read from the file.
+
+ See Documentation/wmi/devices/msi-wmi-platform.rst for details.
diff --git a/Documentation/ABI/testing/debugfs-pcie-ptm b/Documentation/ABI/testing/debugfs-pcie-ptm
new file mode 100644
index 000000000000..602d41363571
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-pcie-ptm
@@ -0,0 +1,70 @@
+What: /sys/kernel/debug/pcie_ptm_*/local_clock
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM local clock in nanoseconds. Applicable for both Root
+ Complex and Endpoint controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/master_clock
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM master clock in nanoseconds. Applicable only for
+ Endpoint controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/t1
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM T1 timestamp in nanoseconds. Applicable only for
+ Endpoint controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/t2
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM T2 timestamp in nanoseconds. Applicable only for
+ Root Complex controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/t3
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM T3 timestamp in nanoseconds. Applicable only for
+ Root Complex controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/t4
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RO) PTM T4 timestamp in nanoseconds. Applicable only for
+ Endpoint controllers.
+
+What: /sys/kernel/debug/pcie_ptm_*/context_update
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RW) Control the PTM context update mode. Applicable only for
+ Endpoint controllers.
+
+ Following values are supported:
+
+ * auto = PTM context auto update trigger for every 10ms
+
+ * manual = PTM context manual update. Writing 'manual' to this
+ file triggers PTM context update (default)
+
+What: /sys/kernel/debug/pcie_ptm_*/context_valid
+Date: May 2025
+Contact: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+Description:
+ (RW) Control the PTM context validity (local clock timing).
+ Applicable only for Root Complex controllers. PTM context is
+ invalidated by hardware if the Root Complex enters low power
+ mode or changes link frequency.
+
+ Following values are supported:
+
+ * 0 = PTM context invalid (default)
+
+ * 1 = PTM context valid
diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
deleted file mode 100644
index f6f65a4faea0..000000000000
--- a/Documentation/ABI/testing/debugfs-pktcdvd
+++ /dev/null
@@ -1,18 +0,0 @@
-What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
-
-The pktcdvd module (packet writing driver) creates
-these files in debugfs:
-
-/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
-
- ==== ====== ====================================
- info 0444 Lots of driver statistics and infos.
- ==== ====== ====================================
-
-Example::
-
- cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/ABI/testing/debugfs-scmi-raw b/Documentation/ABI/testing/debugfs-scmi-raw
index 97678cc9535c..5847b96b3896 100644
--- a/Documentation/ABI/testing/debugfs-scmi-raw
+++ b/Documentation/ABI/testing/debugfs-scmi-raw
@@ -31,6 +31,42 @@ Description: SCMI Raw asynchronous message injection/snooping facility; write
(receiving an EOF at each message boundary).
Users: Debugging, any userspace test suite
+What: /sys/kernel/debug/scmi/<n>/raw/message_poll
+Date: June 2025
+KernelVersion: 6.16
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw message injection/snooping facility using polling mode;
+ write a complete SCMI command message (header included) in
+ little-endian binary format to have it sent to the configured
+ backend SCMI server for instance <n>, using polling mode on
+ the reception path. (if transport is polling capable)
+ Any subsequently received response can be read from this same
+ entry if it arrived within the configured timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/message_poll_async
+Date: June 2025
+KernelVersion: 6.16
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw asynchronous message injection/snooping facility using
+ polling-mode; write a complete SCMI asynchronous command message
+ (header included) in little-endian binary format to have it sent
+ to the configured backend SCMI server for instance <n>, using
+ polling-mode on the reception path of the immediate part of the
+ asynchronous command. (if transport is polling capable)
+ Any subsequently received response can be read from this same
+ entry if it arrived within the configured timeout.
+ Any additional delayed response received afterwards can be read
+ from this same entry too if it arrived within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+Users: Debugging, any userspace test suite
+
What: /sys/kernel/debug/scmi/<n>/raw/errors
Date: March 2023
KernelVersion: 6.3
@@ -115,3 +151,58 @@ Description: SCMI Raw asynchronous message injection/snooping facility; write
exist only if the transport is configured to have more than
one default channel.
Users: Debugging, any userspace test suite
+
+
+What: /sys/kernel/debug/scmi/<n>/raw/channels/<m>/message_poll
+Date: June 2025
+KernelVersion: 6.16
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw message injection/snooping facility using polling mode;
+ write a complete SCMI command message (header included) in
+ little-endian binary format to have it sent to the configured
+ backend SCMI server for instance <n> through the <m> transport
+ channel, using polling mode on the reception path.
+ (if transport is polling capable)
+ Any subsequently received response can be read from this same
+ entry if it arrived on channel <m> within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+ Channel identifier <m> matches the SCMI protocol number which
+ has been associated with this transport channel in the DT
+ description, with base protocol number 0x10 being the default
+ channel for this instance.
+ Note that these per-channel entries rooted at <..>/channels
+ exist only if the transport is configured to have more than
+ one default channel.
+Users: Debugging, any userspace test suite
+
+What: /sys/kernel/debug/scmi/<n>/raw/channels/<m>/message_poll_async
+Date: June 2025
+KernelVersion: 6.16
+Contact: cristian.marussi@arm.com
+Description: SCMI Raw asynchronous message injection/snooping facility using
+ polling-mode; write a complete SCMI asynchronous command message
+ (header included) in little-endian binary format to have it sent
+ to the configured backend SCMI server for instance <n> through
+ the <m> transport channel, using polling mode on the reception
+ path of the immediate part of the asynchronous command.
+ (if transport is polling capable)
+ Any subsequently received response can be read from this same
+ entry if it arrived on channel <m> within the configured
+ timeout.
+ Any additional delayed response received afterwards can be read
+ from this same entry too if it arrived within the configured
+ timeout.
+ Each write to the entry causes one command request to be built
+ and sent while the replies are read back one message at time
+ (receiving an EOF at each message boundary).
+ Channel identifier <m> matches the SCMI protocol number which
+ has been associated with this transport channel in the DT
+ description, with base protocol number 0x10 being the default
+ channel for this instance.
+ Note that these per-channel entries rooted at <..>/channels
+ exist only if the transport is configured to have more than
+ one default channel.
+Users: Debugging, any userspace test suite
diff --git a/Documentation/ABI/testing/debugfs-tpmi b/Documentation/ABI/testing/debugfs-tpmi
index 597f0475fe6e..c493a1403d2f 100644
--- a/Documentation/ABI/testing/debugfs-tpmi
+++ b/Documentation/ABI/testing/debugfs-tpmi
@@ -29,3 +29,12 @@ Example:
echo 0,0x20,0xff > mem_write
echo 1,64,64 > mem_write
Users: Debugging, any user space test suite
+
+What: /sys/kernel/debug/tpmi-<n>/plr/domain<n>/status
+Date: Aug 2024
+KernelVersion: 6.11
+Contact: Tero Kristo <tero.kristo@linux.intel.com>
+Description:
+Shows the currently active Performance Limit Reasons for die level and the
+individual CPUs under the die. The contents of this file are sticky, and
+clearing all the statuses can be done by writing "0\n" to this file.
diff --git a/Documentation/ABI/testing/debugfs-turris-mox-rwtm b/Documentation/ABI/testing/debugfs-turris-mox-rwtm
deleted file mode 100644
index 813987d5de4e..000000000000
--- a/Documentation/ABI/testing/debugfs-turris-mox-rwtm
+++ /dev/null
@@ -1,14 +0,0 @@
-What: /sys/kernel/debug/turris-mox-rwtm/do_sign
-Date: Jun 2020
-KernelVersion: 5.8
-Contact: Marek Behún <kabel@kernel.org>
-Description:
-
- ======= ===========================================================
- (Write) Message to sign with the ECDSA private key stored in
- device's OTP. The message must be exactly 64 bytes
- (since this is intended for SHA-512 hashes).
- (Read) The resulting signature, 136 bytes. This contains the
- R and S values of the ECDSA signature, both in
- big-endian format.
- ======= ===========================================================
diff --git a/Documentation/ABI/testing/debugfs-vfio b/Documentation/ABI/testing/debugfs-vfio
index 90f7c262f591..70ec2d454686 100644
--- a/Documentation/ABI/testing/debugfs-vfio
+++ b/Documentation/ABI/testing/debugfs-vfio
@@ -23,3 +23,9 @@ Contact: Longfang Liu <liulongfang@huawei.com>
Description: Read the live migration status of the vfio device.
The contents of the state file reflects the migration state
relative to those defined in the vfio_device_mig_state enum
+
+What: /sys/kernel/debug/vfio/<device>/migration/features
+Date: Oct 2025
+KernelVersion: 6.18
+Contact: Cédric Le Goater <clg@redhat.com>
+Description: Read the migration features of the vfio device.
diff --git a/Documentation/ABI/testing/gpio-cdev b/Documentation/ABI/testing/gpio-cdev
index 66bdcd188b6c..c9689b2a6fed 100644
--- a/Documentation/ABI/testing/gpio-cdev
+++ b/Documentation/ABI/testing/gpio-cdev
@@ -6,8 +6,9 @@ Description:
The character device files /dev/gpiochip* are the interface
between GPIO chips and userspace.
- The ioctl(2)-based ABI is defined and documented in
- [include/uapi]<linux/gpio.h>.
+ The ioctl(2)-based ABI is defined in
+ [include/uapi]<linux/gpio.h> and documented in
+ Documentation/userspace-api/gpio/chardev.rst.
The following file operations are supported:
@@ -17,8 +18,8 @@ Description:
ioctl(2)
Initiate various actions.
- See the inline documentation in [include/uapi]<linux/gpio.h>
- for descriptions of all ioctls.
+ See Documentation/userspace-api/gpio/chardev.rst
+ for a description of all ioctls.
close(2)
Stops and free up the I/O contexts that was associated
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index c2385183826c..d4b3696a9efb 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -20,9 +20,10 @@ Description:
rule format: action [condition ...]
action: measure | dont_measure | appraise | dont_appraise |
- audit | hash | dont_hash
+ audit | dont_audit | hash | dont_hash
condition:= base | lsm [option]
base: [[func=] [mask=] [fsmagic=] [fsuuid=] [fsname=]
+ [fs_subtype=]
[uid=] [euid=] [gid=] [egid=]
[fowner=] [fgroup=]]
lsm: [[subj_user=] [subj_role=] [subj_type=]
diff --git a/Documentation/ABI/testing/sysfs-auxdisplay-linedisp b/Documentation/ABI/testing/sysfs-auxdisplay-linedisp
new file mode 100644
index 000000000000..55f1b559e84e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-auxdisplay-linedisp
@@ -0,0 +1,90 @@
+What: /sys/.../message
+Date: October 2021
+KernelVersion: 5.16
+Description:
+ Controls the text message displayed on character line displays.
+
+ Reading returns the current message with a trailing newline.
+ Writing updates the displayed message. Messages longer than the
+ display width will automatically scroll. Trailing newlines in
+ input are automatically trimmed.
+
+ Writing an empty string clears the display.
+
+ Example:
+ echo "Hello World" > message
+ cat message # Returns "Hello World\n"
+
+What: /sys/.../num_chars
+Date: November 2025
+KernelVersion: 6.18
+Contact: Jean-François Lessard <jefflessard3@gmail.com>
+Description:
+ Read-only attribute showing the character width capacity of
+ the line display device. Messages longer than this will scroll.
+
+ Example:
+ cat num_chars # Returns "16\n" for 16-char display
+
+What: /sys/.../scroll_step_ms
+Date: October 2021
+KernelVersion: 5.16
+Description:
+ Controls the scrolling speed for messages longer than the display
+ width, specified in milliseconds per scroll step.
+
+ Setting to 0 disables scrolling. Default is 500ms.
+
+ Example:
+ echo "250" > scroll_step_ms # 4Hz scrolling
+ cat scroll_step_ms # Returns "250\n"
+
+What: /sys/.../map_seg7
+Date: January 2024
+KernelVersion: 6.9
+Description:
+ Read/write binary blob representing the ASCII-to-7-segment
+ display conversion table used by the linedisp driver, as defined
+ by struct seg7_conversion_map in <linux/map_to_7segment.h>.
+
+ Only visible on displays with 7-segment capability.
+
+ This attribute is not human-readable. Writes must match the
+ struct size exactly, else -EINVAL is returned; reads return the
+ entire mapping as a binary blob.
+
+ This interface and its implementation match existing conventions
+ used in segment-mapped display drivers since 2005.
+
+ ABI note: This style of binary sysfs attribute *is an exception*
+ to current "one value per file, text only" sysfs rules, for
+ historical compatibility and driver uniformity. New drivers are
+ discouraged from introducing additional binary sysfs ABIs.
+
+ Reference interface guidance:
+ - include/uapi/linux/map_to_7segment.h
+
+What: /sys/.../map_seg14
+Date: January 2024
+KernelVersion: 6.9
+Description:
+ Read/write binary blob representing the ASCII-to-14-segment
+ display conversion table used by the linedisp driver, as defined
+ by struct seg14_conversion_map in <linux/map_to_14segment.h>.
+
+ Only visible on displays with 14-segment capability.
+
+ This attribute is not human-readable. Writes must match the
+ struct size exactly, else -EINVAL is returned; reads return the
+ entire mapping as a binary blob.
+
+ This interface and its implementation match existing conventions
+ used by segment-mapped display drivers since 2005.
+
+ ABI note: This style of binary sysfs attribute *is an exception*
+ to current "one value per file, text only" sysfs rules, for
+ historical compatibility and driver uniformity. New drivers are
+ discouraged from introducing additional binary sysfs ABIs.
+
+ Reference interface guidance:
+ - include/uapi/linux/map_to_14segment.h
diff --git a/Documentation/ABI/testing/sysfs-block-bcache b/Documentation/ABI/testing/sysfs-block-bcache
index 9e4bbc5d51fd..9344a657ca70 100644
--- a/Documentation/ABI/testing/sysfs-block-bcache
+++ b/Documentation/ABI/testing/sysfs-block-bcache
@@ -106,13 +106,6 @@ Description:
will be discarded from the cache. Should not be turned off with
writeback caching enabled.
-What: /sys/block/<disk>/bcache/discard
-Date: November 2010
-Contact: Kent Overstreet <kent.overstreet@gmail.com>
-Description:
- For a cache, a boolean allowing discard/TRIM to be turned off
- or back on if the device supports it.
-
What: /sys/block/<disk>/bcache/bucket_size
Date: November 2010
Contact: Kent Overstreet <kent.overstreet@gmail.com>
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index 628a00fb20a9..36c57de0a10a 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -22,14 +22,6 @@ Description:
device. The reset operation frees all the memory associated
with this device.
-What: /sys/block/zram<id>/max_comp_streams
-Date: February 2014
-Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
- The max_comp_streams file is read-write and specifies the
- number of backend's zcomp_strm compression streams (number of
- concurrent compress operations).
-
What: /sys/block/zram<id>/comp_algorithm
Date: February 2014
Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
@@ -151,3 +143,10 @@ Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
Description:
The recompress file is write-only and triggers re-compression
with secondary compression algorithms.
+
+What: /sys/block/zram<id>/algorithm_params
+Date: August 2024
+Contact: Sergey Senozhatsky <senozhatsky@chromium.org>
+Description:
+ The algorithm_params file is write-only and is used to setup
+ compression algorithm parameters.
diff --git a/Documentation/ABI/testing/sysfs-bus-acpi b/Documentation/ABI/testing/sysfs-bus-acpi
index 58abacf59b2a..6f2b907a8013 100644
--- a/Documentation/ABI/testing/sysfs-bus-acpi
+++ b/Documentation/ABI/testing/sysfs-bus-acpi
@@ -1,6 +1,6 @@
What: /sys/bus/acpi/devices/.../path
Date: December 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute indicates the full path of ACPI namespace
object associated with the device object. For example,
@@ -12,7 +12,7 @@ Description:
What: /sys/bus/acpi/devices/.../modalias
Date: July 2007
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute indicates the PNP IDs of the device object.
That is acpi:HHHHHHHH:[CCCCCCC:]. Where each HHHHHHHH or
@@ -20,7 +20,7 @@ Description:
What: /sys/bus/acpi/devices/.../hid
Date: April 2005
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute indicates the hardware ID (_HID) of the
device object. For example, PNP0103.
@@ -29,14 +29,14 @@ Description:
What: /sys/bus/acpi/devices/.../description
Date: October 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute contains the output of the device object's
_STR control method, if present.
What: /sys/bus/acpi/devices/.../adr
Date: October 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute contains the output of the device object's
_ADR control method, which is present for ACPI device
@@ -45,14 +45,14 @@ Description:
What: /sys/bus/acpi/devices/.../uid
Date: October 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
This attribute contains the output of the device object's
_UID control method, if present.
What: /sys/bus/acpi/devices/.../eject
Date: December 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
Writing 1 to this attribute will trigger hot removal of
this device object. This file exists for every device
@@ -60,7 +60,7 @@ Description:
What: /sys/bus/acpi/devices/.../status
Date: Jan, 2014
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
(RO) Returns the ACPI device status: enabled, disabled or
functioning or present, if the method _STA is present.
@@ -90,7 +90,7 @@ Description:
What: /sys/bus/acpi/devices/.../hrv
Date: Apr, 2016
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
(RO) Allows users to read the hardware version of non-PCI
hardware, if the _HRV control method is present. It is mostly
diff --git a/Documentation/ABI/testing/sysfs-bus-auxiliary b/Documentation/ABI/testing/sysfs-bus-auxiliary
new file mode 100644
index 000000000000..cc856079690f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-auxiliary
@@ -0,0 +1,9 @@
+What: /sys/bus/auxiliary/devices/.../irqs/
+Date: April, 2024
+Contact: Shay Drory <shayd@nvidia.com>
+Description:
+ The /sys/devices/.../irqs directory contains a variable set of
+ files, with each file is named as irq number similar to PCI PF
+ or VF's irq number located in msi_irqs directory.
+ These irq files are added and removed dynamically when an IRQ
+ is requested and freed respectively for the PCI SF.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti b/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti
index bf2869c413e7..a2aef7f5a6d7 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-cti
@@ -1,241 +1,247 @@
What: /sys/bus/coresight/devices/<cti-name>/enable
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Enable/Disable the CTI hardware.
What: /sys/bus/coresight/devices/<cti-name>/powered
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Indicate if the CTI hardware is powered.
What: /sys/bus/coresight/devices/<cti-name>/ctmid
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Display the associated CTM ID
What: /sys/bus/coresight/devices/<cti-name>/nr_trigger_cons
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Number of devices connected to triggers on this CTI
What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/name
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Name of connected device <N>
What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/in_signals
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Input trigger signals from connected device <N>
What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/in_types
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Functional types for the input trigger signals
from connected device <N>
What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/out_signals
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Output trigger signals to connected device <N>
What: /sys/bus/coresight/devices/<cti-name>/triggers<N>/out_types
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Functional types for the output trigger signals
to connected device <N>
What: /sys/bus/coresight/devices/<cti-name>/regs/inout_sel
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Select the index for inen and outen registers.
What: /sys/bus/coresight/devices/<cti-name>/regs/inen
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Read or write the CTIINEN register selected by inout_sel.
What: /sys/bus/coresight/devices/<cti-name>/regs/outen
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Read or write the CTIOUTEN register selected by inout_sel.
What: /sys/bus/coresight/devices/<cti-name>/regs/gate
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Read or write CTIGATE register.
What: /sys/bus/coresight/devices/<cti-name>/regs/asicctl
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Read or write ASICCTL register.
What: /sys/bus/coresight/devices/<cti-name>/regs/intack
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Write the INTACK register.
What: /sys/bus/coresight/devices/<cti-name>/regs/appset
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Set CTIAPPSET register to activate channel. Read back to
determine current value of register.
What: /sys/bus/coresight/devices/<cti-name>/regs/appclear
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Write APPCLEAR register to deactivate channel.
What: /sys/bus/coresight/devices/<cti-name>/regs/apppulse
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Write APPPULSE to pulse a channel active for one clock
cycle.
What: /sys/bus/coresight/devices/<cti-name>/regs/chinstatus
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Read current status of channel inputs.
What: /sys/bus/coresight/devices/<cti-name>/regs/choutstatus
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) read current status of channel outputs.
What: /sys/bus/coresight/devices/<cti-name>/regs/triginstatus
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) read current status of input trigger signals
What: /sys/bus/coresight/devices/<cti-name>/regs/trigoutstatus
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) read current status of output trigger signals.
What: /sys/bus/coresight/devices/<cti-name>/channels/trigin_attach
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Attach a CTI input trigger to a CTM channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/trigin_detach
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Detach a CTI input trigger from a CTM channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/trigout_attach
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Attach a CTI output trigger to a CTM channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/trigout_detach
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Detach a CTI output trigger from a CTM channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_gate_enable
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Enable CTIGATE for single channel (Write) or list enabled
channels through the gate (R).
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_gate_disable
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Disable CTIGATE for single channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_set
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Activate a single channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_clear
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Deactivate a single channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_pulse
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Pulse a single channel - activate for a single clock cycle.
What: /sys/bus/coresight/devices/<cti-name>/channels/trigout_filtered
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) List of output triggers filtered across all connections.
What: /sys/bus/coresight/devices/<cti-name>/channels/trig_filter_enable
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Enable or disable trigger output signal filtering.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_inuse
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) show channels with at least one attached trigger signal.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_free
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) show channels with no attached trigger signals.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_sel
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (RW) Write channel number to select a channel to view, read to
see selected channel number.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_in
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Read to see input triggers connected to selected view
channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_out
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Read) Read to see output triggers connected to selected view
channel.
What: /sys/bus/coresight/devices/<cti-name>/channels/chan_xtrigs_reset
Date: March 2020
-KernelVersion 5.7
+KernelVersion: 5.7
Contact: Mike Leach or Mathieu Poirier
Description: (Write) Clear all channel / trigger programming.
+
+What: /sys/bus/coresight/devices/<cti-name>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-dummy-source b/Documentation/ABI/testing/sysfs-bus-coresight-devices-dummy-source
new file mode 100644
index 000000000000..321e3ee1fc9d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-dummy-source
@@ -0,0 +1,21 @@
+What: /sys/bus/coresight/devices/dummy_source<N>/enable_source
+Date: Dec 2024
+KernelVersion: 6.14
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (RW) Enable/disable tracing of dummy source. A sink should be activated
+ before enabling the source. The path of coresight components linking
+ the source to the sink is configured and managed automatically by the
+ coresight framework.
+
+What: /sys/bus/coresight/devices/dummy_source<N>/traceid
+Date: Dec 2024
+KernelVersion: 6.14
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (R) Show the trace ID that will appear in the trace stream
+ coming from this trace entity.
+
+What: /sys/bus/coresight/devices/dummy_source<N>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10 b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10
index 9a383f6a74eb..f30526949687 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10
@@ -19,6 +19,12 @@ Description: (RW) Disables write access to the Trace RAM by stopping the
into the Trace RAM following the trigger event is equal to the
value stored in this register+1 (from ARM ETB-TRM).
+What: /sys/bus/coresight/devices/<memory_map>.etb/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
+
What: /sys/bus/coresight/devices/<memory_map>.etb/mgmt/rdp
Date: March 2016
KernelVersion: 4.7
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x
index 3acf7fc31659..245c322c91f1 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x
@@ -22,7 +22,7 @@ Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Used in conjunction with @addr_idx. Specifies
characteristics about the address comparator being configure,
for example the access type, the kind of instruction to trace,
- processor contect ID to trigger on, etc. Individual fields in
+ processor context ID to trigger on, etc. Individual fields in
the access type register may vary on the version of the trace
entity.
@@ -251,6 +251,12 @@ KernelVersion: 4.4
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RO) Holds the cpu number this tracer is affined to.
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
+
What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/mgmt/etmccr
Date: September 2015
KernelVersion: 4.4
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
index a0425d70d009..6f19a6a5f2e1 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
@@ -329,6 +329,12 @@ Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Access the selected single show PE comparator control
register.
+What: /sys/bus/coresight/devices/etm<N>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
+
What: /sys/bus/coresight/devices/etm<N>/mgmt/trcoslsr
Date: April 2015
KernelVersion: 4.01
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel b/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel
index d75acda5e1b3..86938e9bbcde 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel
@@ -10,3 +10,9 @@ Date: November 2014
KernelVersion: 3.19
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Defines input port priority order.
+
+What: /sys/bus/coresight/devices/<memory_map>.funnel/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-stm b/Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
index 53e1f4815d64..848e2ffc1480 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
@@ -51,3 +51,9 @@ KernelVersion: 4.7
Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
Description: (RW) Holds the trace ID that will appear in the trace stream
coming from this trace entity.
+
+What: /sys/bus/coresight/devices/<memory_map>.stm/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
index 96aafa66b4a5..55e298b9c4a4 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
@@ -97,7 +97,7 @@ Date: August 2023
KernelVersion: 6.7
Contact: Anshuman Khandual <anshuman.khandual@arm.com>
Description: (Read) Shows all supported Coresight TMC-ETR buffer modes available
- for the users to configure explicitly. This file is avaialble only
+ for the users to configure explicitly. This file is available only
for TMC ETR devices.
What: /sys/bus/coresight/devices/<memory_map>.tmc/buf_mode_preferred
@@ -107,3 +107,9 @@ Contact: Anshuman Khandual <anshuman.khandual@arm.com>
Description: (RW) Current Coresight TMC-ETR buffer mode selected. But user could
only provide a mode which is supported for a given ETR device. This
file is available only for TMC ETR devices.
+
+What: /sys/bus/coresight/devices/<memory_map>.tmc/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm
index 4dd49b159543..98f1c6545027 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tpdm
@@ -1,6 +1,6 @@
What: /sys/bus/coresight/devices/<tpdm-name>/integration_test
Date: January 2023
-KernelVersion 6.2
+KernelVersion: 6.2
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(Write) Run integration test for tpdm. Integration test
@@ -14,7 +14,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/reset_dataset
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(Write) Reset the dataset of the tpdm.
@@ -24,7 +24,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_trig_type
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the trigger type of the DSB for tpdm.
@@ -35,7 +35,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_trig_ts
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the trigger timestamp of the DSB for tpdm.
@@ -46,7 +46,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_mode
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the programming mode of the DSB for tpdm.
@@ -60,7 +60,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/ctrl_idx
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the index number of the edge detection for the DSB
@@ -69,7 +69,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/ctrl_val
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
Write a data to control the edge detection corresponding to
@@ -85,7 +85,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/ctrl_mask
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
Write a data to mask the edge detection corresponding to the index
@@ -97,21 +97,21 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/edcr[0:15]
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
Read a set of the edge control value of the DSB in TPDM.
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_edge/edcmr[0:7]
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
Read a set of the edge control mask of the DSB in TPDM.
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_trig_patt/xpr[0:7]
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the value of the trigger pattern for the DSB
@@ -119,7 +119,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_trig_patt/xpmr[0:7]
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the mask of the trigger pattern for the DSB
@@ -127,21 +127,21 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/tpr[0:7]
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the value of the pattern for the DSB subunit TPDM.
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/tpmr[0:7]
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the mask of the pattern for the DSB subunit TPDM.
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/enable_ts
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(Write) Set the pattern timestamp of DSB tpdm. Read
@@ -153,7 +153,7 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/set_type
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(Write) Set the pattern type of DSB tpdm. Read
@@ -165,8 +165,116 @@ Description:
What: /sys/bus/coresight/devices/<tpdm-name>/dsb_msr/msr[0:31]
Date: March 2023
-KernelVersion 6.7
+KernelVersion: 6.7
Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
Description:
(RW) Set/Get the MSR(mux select register) for the DSB subunit
TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_mode
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description: (Write) Set the data collection mode of CMB tpdm. Continuous
+ change creates CMB data set elements on every CMBCLK edge.
+ Trace-on-change creates CMB data set elements only when a new
+ data set element differs in value from the previous element
+ in a CMB data set.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Continuous CMB collection mode.
+ 1 : Trace-on-change CMB collection mode.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_trig_patt/xpr[0:1]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the value of the trigger pattern for the CMB
+ subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_trig_patt/xpmr[0:1]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the mask of the trigger pattern for the CMB
+ subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/tpr[0:1]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the value of the pattern for the CMB subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/dsb_patt/tpmr[0:1]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the mask of the pattern for the CMB subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_patt/enable_ts
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (Write) Set the pattern timestamp of CMB tpdm. Read
+ the pattern timestamp of CMB tpdm.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Disable CMB pattern timestamp.
+ 1 : Enable CMB pattern timestamp.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_trig_ts
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the trigger timestamp of the CMB for tpdm.
+
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Set the CMB trigger type to false
+ 1 : Set the CMB trigger type to true
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_ts_all
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Read or write the status of timestamp upon all interface.
+ Only value 0 and 1 can be written to this node. Set this node to 1 to request
+ timestamp to all trace packet.
+ Accepts only one of the 2 values - 0 or 1.
+ 0 : Disable the timestamp of all trace packets.
+ 1 : Enable the timestamp of all trace packets.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/cmb_msr/msr[0:31]
+Date: January 2024
+KernelVersion: 6.9
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the MSR(mux select register) for the CMB subunit
+ TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/mcmb_trig_lane
+Date: Feb 2025
+KernelVersion 6.15
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get which lane participates in the output pattern
+ match cross trigger mechanism for the MCMB subunit TPDM.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/mcmb_lanes_select
+Date: Feb 2025
+KernelVersion 6.15
+Contact: Jinlong Mao (QUIC) <quic_jinlmao@quicinc.com>, Tao Zhang (QUIC) <quic_taozha@quicinc.com>
+Description:
+ (RW) Set/Get the enablement of the individual lane.
+
+What: /sys/bus/coresight/devices/<tpdm-name>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe b/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
index ad3bbc6fa751..8a4b749ed26e 100644
--- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
@@ -12,3 +12,9 @@ Contact: Anshuman Khandual <anshuman.khandual@arm.com>
Description: (Read) Shows if TRBE updates in the memory are with access
and dirty flag updates as well. This value is fetched from
the TRBIDR register.
+
+What: /sys/bus/coresight/devices/trbe<cpu>/label
+Date: Aug 2025
+KernelVersion 6.18
+Contact: Mao Jinlong <quic_jinlmao@quicinc.com>
+Description: (Read) Show hardware context information of device.
diff --git a/Documentation/ABI/testing/sysfs-bus-counter b/Documentation/ABI/testing/sysfs-bus-counter
index 73ac84c0bca7..3e7eddd8aff3 100644
--- a/Documentation/ABI/testing/sysfs-bus-counter
+++ b/Documentation/ABI/testing/sysfs-bus-counter
@@ -34,6 +34,14 @@ Contact: linux-iio@vger.kernel.org
Description:
Count data of Count Y represented as a string.
+What: /sys/bus/counter/devices/counterX/countY/compare
+KernelVersion: 6.15
+Contact: linux-iio@vger.kernel.org
+Description:
+ If the counter device supports compare registers -- registers
+ used to compare counter channels against a particular count --
+ the compare count for channel Y is provided by this attribute.
+
What: /sys/bus/counter/devices/counterX/countY/capture
KernelVersion: 6.1
Contact: linux-iio@vger.kernel.org
@@ -303,23 +311,24 @@ What: /sys/bus/counter/devices/counterX/cascade_counts_enable_component_id
What: /sys/bus/counter/devices/counterX/external_input_phase_clock_select_component_id
What: /sys/bus/counter/devices/counterX/countY/capture_component_id
What: /sys/bus/counter/devices/counterX/countY/ceiling_component_id
-What: /sys/bus/counter/devices/counterX/countY/floor_component_id
+What: /sys/bus/counter/devices/counterX/countY/compare_component_id
What: /sys/bus/counter/devices/counterX/countY/count_mode_component_id
What: /sys/bus/counter/devices/counterX/countY/direction_component_id
What: /sys/bus/counter/devices/counterX/countY/enable_component_id
What: /sys/bus/counter/devices/counterX/countY/error_noise_component_id
+What: /sys/bus/counter/devices/counterX/countY/floor_component_id
+What: /sys/bus/counter/devices/counterX/countY/num_overflows_component_id
What: /sys/bus/counter/devices/counterX/countY/prescaler_component_id
What: /sys/bus/counter/devices/counterX/countY/preset_component_id
What: /sys/bus/counter/devices/counterX/countY/preset_enable_component_id
What: /sys/bus/counter/devices/counterX/countY/signalZ_action_component_id
-What: /sys/bus/counter/devices/counterX/countY/num_overflows_component_id
What: /sys/bus/counter/devices/counterX/signalY/cable_fault_component_id
What: /sys/bus/counter/devices/counterX/signalY/cable_fault_enable_component_id
What: /sys/bus/counter/devices/counterX/signalY/filter_clock_prescaler_component_id
+What: /sys/bus/counter/devices/counterX/signalY/frequency_component_id
What: /sys/bus/counter/devices/counterX/signalY/index_polarity_component_id
What: /sys/bus/counter/devices/counterX/signalY/polarity_component_id
What: /sys/bus/counter/devices/counterX/signalY/synchronous_mode_component_id
-What: /sys/bus/counter/devices/counterX/signalY/frequency_component_id
KernelVersion: 5.16
Contact: linux-iio@vger.kernel.org
Description:
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index fff2581b8033..c80a1b5a03db 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -1,5 +1,5 @@
What: /sys/bus/cxl/flush
-Date: Januarry, 2022
+Date: January, 2022
KernelVersion: v5.18
Contact: linux-cxl@vger.kernel.org
Description:
@@ -18,6 +18,24 @@ Description:
specification.
+What: /sys/bus/cxl/devices/memX/payload_max
+Date: December, 2020
+KernelVersion: v5.12
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Maximum size (in bytes) of the mailbox command payload
+ registers. Linux caps this at 1MB if the device reports a
+ larger size.
+
+
+What: /sys/bus/cxl/devices/memX/label_storage_size
+Date: May, 2021
+KernelVersion: v5.13
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) Size (in bytes) of the Label Storage Area (LSA).
+
+
What: /sys/bus/cxl/devices/memX/ram/size
Date: December, 2020
KernelVersion: v5.12
@@ -33,7 +51,7 @@ Date: May, 2023
KernelVersion: v6.8
Contact: linux-cxl@vger.kernel.org
Description:
- (RO) For CXL host platforms that support "QoS Telemmetry"
+ (RO) For CXL host platforms that support "QoS Telemetry"
this attribute conveys a comma delimited list of platform
specific cookies that identifies a QoS performance class
for the volatile partition of the CXL mem device. These
@@ -60,7 +78,7 @@ Date: May, 2023
KernelVersion: v6.8
Contact: linux-cxl@vger.kernel.org
Description:
- (RO) For CXL host platforms that support "QoS Telemmetry"
+ (RO) For CXL host platforms that support "QoS Telemetry"
this attribute conveys a comma delimited list of platform
specific cookies that identifies a QoS performance class
for the persistent partition of the CXL mem device. These
@@ -224,7 +242,7 @@ Description:
decoding a Host Physical Address range. Note that this number
may be elevated without any regionX objects active or even
enumerated, as this may be due to decoders established by
- platform firwmare or a previous kernel (kexec).
+ platform firmware or a previous kernel (kexec).
What: /sys/bus/cxl/devices/decoderX.Y
@@ -321,14 +339,13 @@ KernelVersion: v6.0
Contact: linux-cxl@vger.kernel.org
Description:
(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
- translates from a host physical address range, to a device local
- address range. Device-local address ranges are further split
- into a 'ram' (volatile memory) range and 'pmem' (persistent
- memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
- 'mixed', or 'none'. The 'mixed' indication is for error cases
- when a decoder straddles the volatile/persistent partition
- boundary, and 'none' indicates the decoder is not actively
- decoding, or no DPA allocation policy has been set.
+ translates from a host physical address range, to a device
+ local address range. Device-local address ranges are further
+ split into a 'ram' (volatile memory) range and 'pmem'
+ (persistent memory) range. The 'mode' attribute emits one of
+ 'ram', 'pmem', or 'none'. The 'none' indicates the decoder is
+ not actively decoding, or no DPA allocation policy has been
+ set.
'mode' can be written, when the decoder is in the 'disabled'
state, with either 'ram' or 'pmem' to set the boundaries for the
@@ -423,7 +440,7 @@ Date: May, 2023
KernelVersion: v6.5
Contact: linux-cxl@vger.kernel.org
Description:
- (RO) For CXL host platforms that support "QoS Telemmetry" this
+ (RO) For CXL host platforms that support "QoS Telemetry" this
root-decoder-only attribute conveys a platform specific cookie
that identifies a QoS performance class for the CXL Window.
This class-id can be compared against a similar "qos_class"
@@ -479,8 +496,17 @@ Description:
changed, only freed by writing 0. The kernel makes no guarantees
that data is maintained over an address space freeing event, and
there is no guarantee that a free followed by an allocate
- results in the same address being allocated.
+ results in the same address being allocated. If extended linear
+ cache is present, the size indicates extended linear cache size
+ plus the CXL region size.
+What: /sys/bus/cxl/devices/regionZ/extended_linear_cache_size
+Date: October, 2025
+KernelVersion: v6.19
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The size of extended linear cache, if there is an extended
+ linear cache. Otherwise the attribute will not be visible.
What: /sys/bus/cxl/devices/regionZ/mode
Date: January, 2023
@@ -552,3 +578,49 @@ Description:
attribute is only visible for devices supporting the
capability. The retrieved errors are logged as kernel
events when cxl_poison event tracing is enabled.
+
+
+What: /sys/bus/cxl/devices/regionZ/accessY/read_bandwidth
+ /sys/bus/cxl/devices/regionZ/accessY/write_bandwidth
+Date: Jan, 2024
+KernelVersion: v6.9
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The aggregated read or write bandwidth of the region. The
+ number is the accumulated read or write bandwidth of all CXL memory
+ devices that contributes to the region in MB/s. It is
+ identical data that should appear in
+ /sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth or
+ /sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth.
+ See Documentation/ABI/stable/sysfs-devices-node. access0 provides
+ the number to the closest initiator and access1 provides the
+ number to the closest CPU.
+
+
+What: /sys/bus/cxl/devices/regionZ/accessY/read_latency
+ /sys/bus/cxl/devices/regionZ/accessY/write_latency
+Date: Jan, 2024
+KernelVersion: v6.9
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The read or write latency of the region. The number is
+ the worst read or write latency of all CXL memory devices that
+ contributes to the region in nanoseconds. It is identical data
+ that should appear in
+ /sys/devices/system/node/nodeX/accessY/initiators/read_latency or
+ /sys/devices/system/node/nodeX/accessY/initiators/write_latency.
+ See Documentation/ABI/stable/sysfs-devices-node. access0 provides
+ the number to the closest initiator and access1 provides the
+ number to the closest CPU.
+
+
+What: /sys/bus/cxl/devices/nvdimm-bridge0/ndbusX/nmemY/cxl/dirty_shutdown
+Date: Feb, 2025
+KernelVersion: v6.15
+Contact: linux-cxl@vger.kernel.org
+Description:
+ (RO) The device dirty shutdown count value, which is the number
+ of times the device could have incurred in potential data loss.
+ The count is persistent across power loss and wraps back to 0
+ upon overflow. If this file is not present, the device does not
+ have the necessary support for dirty tracking.
diff --git a/Documentation/ABI/testing/sysfs-bus-dax b/Documentation/ABI/testing/sysfs-bus-dax
new file mode 100644
index 000000000000..b34266bfae49
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-dax
@@ -0,0 +1,153 @@
+What: /sys/bus/dax/devices/daxX.Y/align
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) Provides a way to specify an alignment for a dax device.
+ Values allowed are constrained by the physical address ranges
+ that back the dax device, and also by arch requirements.
+
+What: /sys/bus/dax/devices/daxX.Y/mapping
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (WO) Provides a way to allocate a mapping range under a dax
+ device. Specified in the format <start>-<end>.
+
+What: /sys/bus/dax/devices/daxX.Y/mapping[0..N]/start
+What: /sys/bus/dax/devices/daxX.Y/mapping[0..N]/end
+What: /sys/bus/dax/devices/daxX.Y/mapping[0..N]/page_offset
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) A dax device may have multiple constituent discontiguous
+ address ranges. These are represented by the different
+ 'mappingX' subdirectories. The 'start' attribute indicates the
+ start physical address for the given range. The 'end' attribute
+ indicates the end physical address for the given range. The
+ 'page_offset' attribute indicates the offset of the current
+ range in the dax device.
+
+What: /sys/bus/dax/devices/daxX.Y/resource
+Date: June, 2019
+KernelVersion: v5.3
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The resource attribute indicates the starting physical
+ address of a dax device. In case of a device with multiple
+ constituent ranges, it indicates the starting address of the
+ first range.
+
+What: /sys/bus/dax/devices/daxX.Y/size
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) The size attribute indicates the total size of a dax
+ device. For creating subdivided dax devices, or for resizing
+ an existing device, the new size can be written to this as
+ part of the reconfiguration process.
+
+What: /sys/bus/dax/devices/daxX.Y/numa_node
+Date: November, 2019
+KernelVersion: v5.5
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) If NUMA is enabled and the platform has affinitized the
+ backing device for this dax device, emit the CPU node
+ affinity for this device.
+
+What: /sys/bus/dax/devices/daxX.Y/target_node
+Date: February, 2019
+KernelVersion: v5.1
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The target-node attribute is the Linux numa-node that a
+ device-dax instance may create when it is online. Prior to
+ being online the device's 'numa_node' property reflects the
+ closest online cpu node which is the typical expectation of a
+ device 'numa_node'. Once it is online it becomes its own
+ distinct numa node.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/available_size
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The available_size attribute tracks available dax region
+ capacity. This only applies to volatile hmem devices, not pmem
+ devices, since pmem devices are defined by nvdimm namespace
+ boundaries.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/size
+Date: July, 2017
+KernelVersion: v5.1
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The size attribute indicates the size of a given dax region
+ in bytes.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/align
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The align attribute indicates alignment of the dax region.
+ Changes on align may not always be valid, when say certain
+ mappings were created with 2M and then we switch to 1G. This
+ validates all ranges against the new value being attempted, post
+ resizing.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/seed
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The seed device is a concept for dynamic dax regions to be
+ able to split the region amongst multiple sub-instances. The
+ seed device, similar to libnvdimm seed devices, is a device
+ that starts with zero capacity allocated and unbound to a
+ driver.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/create
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) The create interface to the dax region provides a way to
+ create a new unconfigured dax device under the given region, which
+ can then be configured (with a size etc.) and then probed.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/delete
+Date: October, 2020
+KernelVersion: v5.10
+Contact: nvdimm@lists.linux.dev
+Description:
+ (WO) The delete interface for a dax region provides for deletion
+ of any 0-sized and idle dax devices.
+
+What: $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/id
+Date: July, 2017
+KernelVersion: v5.1
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RO) The id attribute indicates the region id of a dax region.
+
+What: /sys/bus/dax/devices/daxX.Y/memmap_on_memory
+Date: January, 2024
+KernelVersion: v6.8
+Contact: nvdimm@lists.linux.dev
+Description:
+ (RW) Control the memmap_on_memory setting if the dax device
+ were to be hotplugged as system memory. This determines whether
+ the 'altmap' for the hotplugged memory will be placed on the
+ device being hotplugged (memmap_on_memory=1) or if it will be
+ placed on regular memory (memmap_on_memory=0). This attribute
+ must be set before the device is handed over to the 'kmem'
+ driver (i.e. hotplugged into system-ram). Additionally, this
+ depends on CONFIG_MHP_MEMMAP_ON_MEMORY, and a globally enabled
+ memmap_on_memory parameter for memory_hotplug. This is
+ typically set on the kernel command line -
+ memory_hotplug.memmap_on_memory set to 'true' or 'force'."
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices b/Documentation/ABI/testing/sysfs-bus-event_source-devices
new file mode 100644
index 000000000000..79b268319df1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices
@@ -0,0 +1,24 @@
+What: /sys/bus/event_source/devices/<pmu>
+Date: 2014/02/24
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Performance Monitoring Unit (<pmu>)
+
+ Each <pmu> directory, for a PMU device, is a name
+ optionally followed by an underscore and then either a
+ decimal or hexadecimal number. For example, cpu is a
+ PMU name without a suffix as is intel_bts,
+ uncore_imc_0 is a PMU name with a 0 numeric suffix,
+ ddr_pmu_87e1b0000000 is a PMU name with a hex
+ suffix. The hex suffix must be more than two
+ characters long to avoid ambiguity with PMUs like the
+ S390 cpum_cf.
+
+ Tools can treat PMUs with the same name that differ by
+ suffix as instances of the same PMU for the sake of,
+ for example, opening an event. For example, the PMUs
+ uncore_imc_free_running_0 and
+ uncore_imc_free_running_1 have an event data_read;
+ opening the data_read event on a PMU specified as
+ uncore_imc_free_running should be treated as opening
+ the data_read event on PMU uncore_imc_free_running_0
+ and PMU uncore_imc_free_running_1.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
index 77de58d03822..0fe1b9487202 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -37,6 +37,14 @@ Description: Per-pmu performance monitoring events specific to the running syste
performance monitoring event supported by the <pmu>. The name
of the file is the name of the event.
+ As performance monitoring event names are case insensitive
+ in the perf tool, the perf tool only looks for all lower
+ case or all upper case event names in sysfs to avoid
+ scanning the directory. It is therefore required the
+ name of the event here is either completely lower or upper
+ case, with no mixed-case characters. Numbers, '.', '_', and
+ '-' are also allowed.
+
File contents:
<term>[=<value>][,<term>[=<value>]]...
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hisi_ptt b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hisi_ptt
new file mode 100644
index 000000000000..1119766564d7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hisi_ptt
@@ -0,0 +1,113 @@
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: This directory contains files for tuning the PCIe link
+ parameters(events). Each file is named after the event
+ of the PCIe link.
+
+ See Documentation/trace/hisi-ptt.rst for more information.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_cpl
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx completion TLPs, which influence
+ the proportion of outbound completion TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_np
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx non-posted TLPs, which influence
+ the proportion of outbound non-posted TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_p
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Controls the weight of Tx posted TLPs, which influence the
+ proportion of outbound posted TLPs on the PCIe link.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/rx_alloc_buf_level
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Control the allocated buffer watermark for inbound packets.
+ The packets will be stored in the buffer first and then transmitted
+ either when the watermark reached or when timed out.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/bus/event_source/devices/hisi_ptt<sicl_id>_<core_id>/tune/tx_alloc_buf_level
+Date: October 2022
+KernelVersion: 6.1
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (RW) Control the allocated buffer watermark of outbound packets.
+ The packets will be stored in the buffer first and then transmitted
+ either when the watermark reached or when timed out.
+ The available tune data is [0, 1, 2]. Writing a negative value
+ will return an error, and out of range values will be converted
+ to 2. The value indicates a probable level of the event.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: This directory contains the files providing the PCIe Root Port filters
+ information used for PTT trace. Each file is named after the supported
+ Root Port device name <domain>:<bus>:<device>.<function>.
+
+ See the description of the "filter" in Documentation/trace/hisi-ptt.rst
+ for more information.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters/multiselect
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (Read) Indicates if this kind of filter can be selected at the same
+ time as others filters, or must be used on it's own. 1 indicates
+ the former case and 0 indicates the latter.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters/<bdf>
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (Read) Indicates the filter value of this Root Port filter, which
+ can be used to control the TLP headers to trace by the PTT trace.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: This directory contains the files providing the PCIe Requester filters
+ information used for PTT trace. Each file is named after the supported
+ Endpoint device name <domain>:<bus>:<device>.<function>.
+
+ See the description of the "filter" in Documentation/trace/hisi-ptt.rst
+ for more information.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters/multiselect
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (Read) Indicates if this kind of filter can be selected at the same
+ time as others filters, or must be used on it's own. 1 indicates
+ the former case and 0 indicates the latter.
+
+What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters/<bdf>
+Date: May 2023
+KernelVersion: 6.5
+Contact: Yicong Yang <yangyicong@hisilicon.com>
+Description: (Read) Indicates the filter value of this Requester filter, which
+ can be used to control the TLP headers to trace by the PTT trace.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-dtl b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-dtl
new file mode 100644
index 000000000000..7b7c789a5cf5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-dtl
@@ -0,0 +1,25 @@
+What: /sys/bus/event_source/devices/vpa_dtl/format
+Date: February 2025
+Contact: Linux on PowerPC Developer List <linuxppc-dev at lists.ozlabs.org>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. Supported attribute are listed
+ below::
+
+ event = "config:0-7" - event ID
+
+ For example::
+
+ dtl_cede = "event=0x1"
+
+What: /sys/bus/event_source/devices/vpa_dtl/events
+Date: February 2025
+Contact: Linux on PowerPC Developer List <linuxppc-dev at lists.ozlabs.org>
+Description: (RO) Attribute group to describe performance monitoring events
+ for the Virtual Processor Dispatch Trace Log. Each attribute in
+ this group describes a single performance monitoring event
+ supported by vpa_dtl pmu. The name of the file is the name of
+ the event (See ABI/testing/sysfs-bus-event_source-devices-events).
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu
new file mode 100644
index 000000000000..a116aee9709a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu
@@ -0,0 +1,25 @@
+What: /sys/bus/event_source/devices/vpa_pmu/format
+Date: November 2024
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config for a particular pmu.
+ (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute under this group defines a bit range of the
+ perf_event_attr.config. Supported attribute are listed
+ below::
+
+ event = "config:0-31" - event ID
+
+ For example::
+
+ l1_to_l2_lat = "event=0x1"
+
+What: /sys/bus/event_source/devices/vpa_pmu/events
+Date: November 2024
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description: Read-only. Attribute group to describe performance monitoring
+ events for the Virtual Processor Area events. Each attribute
+ in this group describes a single performance monitoring event
+ supported by vpa_pmu. The name of the file is the name of
+ the event (See ABI/testing/sysfs-bus-event_source-devices-events).
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-m24lr b/Documentation/ABI/testing/sysfs-bus-i2c-devices-m24lr
new file mode 100644
index 000000000000..7c51ce8d38ba
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-m24lr
@@ -0,0 +1,100 @@
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/unlock
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Write-only attribute used to present a password and unlock
+ access to protected areas of the M24LR chip, including
+ configuration registers such as the Sector Security Status
+ (SSS) bytes. A valid password must be written to enable write
+ access to these regions via the I2C interface.
+
+ Format:
+ - Hexadecimal string representing a 32-bit (4-byte) password
+ - Accepts 1 to 8 hex digits (e.g., "c", "1F", "a1b2c3d4")
+ - No "0x" prefix, whitespace, or trailing newline
+ - Case-insensitive
+
+ Behavior:
+ - If the password matches the internal stored value,
+ access to protected memory/configuration is granted
+ - If the password does not match the internally stored value,
+ it will fail silently
+
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/new_pass
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Write-only attribute used to update the password required to
+ unlock the M24LR chip.
+
+ Format:
+ - Hexadecimal string representing a new 32-bit password
+ - Accepts 1 to 8 hex digits (e.g., "1A", "ffff", "c0ffee00")
+ - No "0x" prefix, whitespace, or trailing newline
+ - Case-insensitive
+
+ Behavior:
+ - Overwrites the current password stored in the I2C password
+ register
+ - Requires the device to be unlocked before changing the
+ password
+ - If the device is locked, the write silently fails
+
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/uid
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Read-only attribute that exposes the 8-byte unique identifier
+ programmed into the M24LR chip at the factory.
+
+ Format:
+ - Lowercase hexadecimal string representing a 64-bit value
+ - 1 to 16 hex digits (e.g., "e00204f12345678")
+ - No "0x" prefix
+ - Includes a trailing newline
+
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/total_sectors
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Read-only attribute that exposes the total number of EEPROM
+ sectors available in the M24LR chip.
+
+ Format:
+ - 1 to 2 hex digits (e.g. "F")
+ - No "0x" prefix
+ - Includes a trailing newline
+
+ Notes:
+ - Value is encoded by the chip and corresponds to the EEPROM
+ size (e.g., 3 = 4 kbit for M24LR04E-R)
+
+What: /sys/bus/i2c/devices/<busnum>-<primary-addr>/sss
+Date: 2025-07-04
+KernelVersion: 6.17
+Contact: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
+Description:
+ Read/write binary attribute representing the Sector Security
+ Status (SSS) bytes for all EEPROM sectors in STMicroelectronics
+ M24LR chips.
+
+ Each EEPROM sector has one SSS byte, which controls I2C and
+ RF access through protection bits and optional password
+ authentication.
+
+ Format:
+ - The file contains one byte per EEPROM sector
+ - Byte at offset N corresponds to sector N
+ - Binary access only; use tools like dd, Python, or C that
+ support byte-level I/O and offset control.
+
+ Notes:
+ - The number of valid bytes in this file is equal to the
+ value exposed by 'total_sectors' file
+ - Write access requires prior password authentication in
+ I2C mode
+ - Refer to the M24LR datasheet for full SSS bit layout
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu
new file mode 100644
index 000000000000..35a8f6dae5bf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu
@@ -0,0 +1,113 @@
+What: /sys/bus/i2c/devices/<mcu_device>/board_revision
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains board revision number.
+
+ Only available if board information is burned in the MCU (older
+ revisions have board information burned in the ATSHA204-A chip).
+
+ Format: %u.
+
+What: /sys/bus/i2c/devices/<mcu_device>/first_mac_address
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains device first MAC address. Each Turris Omnia is
+ allocated 3 MAC addresses. The two additional addresses are
+ computed from the first one by incrementing it.
+
+ Only available if board information is burned in the MCU (older
+ revisions have board information burned in the ATSHA204-A chip).
+
+ Format: %pM.
+
+What: /sys/bus/i2c/devices/<mcu_device>/front_button_mode
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RW) The front button on the Turris Omnia router can be
+ configured either to change the intensity of all the LEDs on the
+ front panel, or to send the press event to the CPU as an
+ interrupt.
+
+ This file switches between these two modes:
+ - ``mcu`` makes the button press event be handled by the MCU to
+ change the LEDs panel intensity.
+ - ``cpu`` makes the button press event be handled by the CPU.
+
+ Format: %s.
+
+What: /sys/bus/i2c/devices/<mcu_device>/front_button_poweron
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RW) Newer versions of the microcontroller firmware of the
+ Turris Omnia router support powering off the router into true
+ low power mode. The router can be powered on by pressing the
+ front button.
+
+ This file configures whether front button power on is enabled.
+
+ This file is present only if the power off feature is supported
+ by the firmware.
+
+ Format: %i.
+
+What: /sys/bus/i2c/devices/<mcu_device>/fw_features
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Newer versions of the microcontroller firmware report the
+ features they support. These can be read from this file. If the
+ MCU firmware is too old, this file reads 0x0.
+
+ Format: 0x%x.
+
+What: /sys/bus/i2c/devices/<mcu_device>/fw_version_hash_application
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the version hash (commit hash) of the application
+ part of the microcontroller firmware.
+
+ Format: %s.
+
+What: /sys/bus/i2c/devices/<mcu_device>/fw_version_hash_bootloader
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the version hash (commit hash) of the bootloader
+ part of the microcontroller firmware.
+
+ Format: %s.
+
+What: /sys/bus/i2c/devices/<mcu_device>/mcu_type
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the microcontroller type (STM32, GD32, MKL).
+
+ Format: %s.
+
+What: /sys/bus/i2c/devices/<mcu_device>/reset_selector
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the selected factory reset level, determined by
+ how long the rear reset button was held by the user during board
+ reset.
+
+ Format: %i.
+
+What: /sys/bus/i2c/devices/<mcu_device>/serial_number
+Date: September 2024
+KernelVersion: 6.11
+Contact: Marek Behún <kabel@kernel.org>
+Description: (RO) Contains the 64-bit board serial number in hexadecimal
+ format.
+
+ Only available if board information is burned in the MCU (older
+ revisions have board information burned in the ATSHA204-A chip).
+
+ Format: %016X.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 2e6d5ebfd3c7..5f87dcee78f7 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -94,6 +94,7 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_sampling_frequency
What: /sys/bus/iio/devices/iio:deviceX/buffer/sampling_frequency
+What: /sys/bus/iio/devices/iio:deviceX/events/sampling_frequency
What: /sys/bus/iio/devices/triggerX/sampling_frequency
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
@@ -140,8 +141,6 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_raw
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -168,19 +167,18 @@ Description:
is required is a consistent labeling. Units after application
of scale and offset are millivolts.
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
-KernelVersion: 3.17
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_rms_raw
+KernelVersion: 6.18
Contact: linux-iio@vger.kernel.org
Description:
- Raw (unscaled no bias removal etc.) current measurement from
- channel Y. In special cases where the channel does not
- correspond to externally available input one of the named
- versions may be used. The number must always be specified and
- unique to allow association with event codes. Units after
- application of scale and offset are milliamps.
+ Raw (unscaled) Root Mean Square (RMS) voltage measurement from
+ channel Y. Units after application of scale and offset are
+ millivolts.
What: /sys/bus/iio/devices/iio:deviceX/in_powerY_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_active_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_reactive_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_apparent_raw
KernelVersion: 4.5
Contact: linux-iio@vger.kernel.org
Description:
@@ -189,6 +187,13 @@ Description:
unique to allow association with event codes. Units after
application of scale and offset are milliwatts.
+What: /sys/bus/iio/devices/iio:deviceX/in_powerY_powerfactor
+KernelVersion: 6.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Power factor measurement from channel Y. Power factor is the
+ ratio of active power to apparent power. The value is unitless.
+
What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceY_raw
KernelVersion: 3.2
Contact: linux-iio@vger.kernel.org
@@ -227,7 +232,7 @@ Description:
same scaling as _raw.
What: /sys/bus/iio/devices/iio:deviceX/in_temp_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_x_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_y_raw
What: /sys/bus/iio/devices/iio:deviceX/in_temp_ambient_raw
@@ -243,7 +248,8 @@ Description:
less measurements. Units after application of scale and offset
are milli degrees Celsius.
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_input
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_input
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_input
KernelVersion: 2.6.38
Contact: linux-iio@vger.kernel.org
Description:
@@ -415,11 +421,11 @@ Contact: linux-iio@vger.kernel.org
Description:
Scaled humidity measurement in milli percent.
-What: /sys/bus/iio/devices/iio:deviceX/in_X_mean_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_Y_mean_raw
KernelVersion: 3.5
Contact: linux-iio@vger.kernel.org
Description:
- Averaged raw measurement from channel X. The number of values
+ Averaged raw measurement from channel Y. The number of values
used for averaging is device specific. The converting rules for
normal raw values also applies to the averaged raw values.
@@ -427,18 +433,14 @@ What: /sys/bus/iio/devices/iio:deviceX/in_accel_offset
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_offset
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_offset
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_q_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_i_offset
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_offset
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_offset
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_offset
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_offset
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_offset
What: /sys/bus/iio/devices/iio:deviceX/in_current_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_current_q_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_current_i_offset
What: /sys/bus/iio/devices/iio:deviceX/in_tempY_offset
What: /sys/bus/iio/devices/iio:deviceX/in_temp_offset
What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_offset
@@ -447,7 +449,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_offset
What: /sys/bus/iio/devices/iio:deviceX/in_magn_offset
What: /sys/bus/iio/devices/iio:deviceX/in_rot_offset
What: /sys/bus/iio/devices/iio:deviceX/in_angl_offset
-What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceX_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceY_offset
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -466,21 +468,15 @@ Description:
to the _raw output.
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_scale
What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_scale
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_scale
What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_scale
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_scale
What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_scale
What: /sys/bus/iio/devices/iio:deviceX/in_current_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_i_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_currentY_q_scale
-What: /sys/bus/iio/devices/iio:deviceX/in_current_i_scale
What: /sys/bus/iio/devices/iio:deviceX/in_current_q_scale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_scale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_peak_scale
@@ -507,6 +503,9 @@ What: /sys/bus/iio/devices/iio:deviceX/in_angl_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_x_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_y_scale
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_z_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_scale
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_scale
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
@@ -522,13 +521,27 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias
-What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
-What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibbias
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -540,6 +553,10 @@ Description:
What: /sys/bus/iio/devices/iio:deviceX/in_accel_calibbias_available
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibbias_available
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibbias_available
KernelVersion: 5.8
Contact: linux-iio@vger.kernel.org
Description:
@@ -548,25 +565,54 @@ Description:
- a small discrete set of values like "0 2 4 6 8"
- a range specified as "[min step max]"
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_convdelay
+KernelVersion: 6.17
+Contact: linux-iio@vger.kernel.org
+Description:
+ Delay of start of conversion from common reference point shared
+ by all channels. Can be writable when used to compensate for
+ delay variation introduced by external filters feeding a
+ simultaneous sampling ADC.
+
+ E.g., for the ad7606 ADC series, this value is intended as a
+ configurable time delay in seconds, to correct delay introduced
+ by an optional external filtering circuit.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_convdelay_available
+KernelVersion: 6.16
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available values of convdelay. Maybe expressed as:
+
+ - a range specified as "[min step max]"
+
+ If shared across all channels, <type>_convdelay_available
+ is used.
+
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_capacitance_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_both_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_ir_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_calibscale
What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
-What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_calibscale
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -574,6 +620,20 @@ Description:
production inaccuracies). If shared across all channels,
<type>_calibscale is used.
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminanceY_calibscale_available
+What: /sys/bus/iio/devices/iio:deviceX/in_intensityY_calibscale_available
+What: /sys/bus/iio/devices/iio:deviceX/in_proximityY_calibscale_available
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale_available
+KernelVersion: 4.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available values of calibscale. Maybe expressed as either of:
+
+ - a small discrete set of values like "1 8 16"
+ - a range specified as "[min step max]"
+
+ If shared across all channels, <type>_calibscale_available is used.
+
What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibgender
What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibgender
What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibgender
@@ -618,10 +678,10 @@ What: /sys/.../iio:deviceX/in_magn_scale_available
What: /sys/.../iio:deviceX/in_illuminance_scale_available
What: /sys/.../iio:deviceX/in_intensity_scale_available
What: /sys/.../iio:deviceX/in_proximity_scale_available
-What: /sys/.../iio:deviceX/in_voltageX_scale_available
+What: /sys/.../iio:deviceX/in_voltageY_scale_available
What: /sys/.../iio:deviceX/in_voltage-voltage_scale_available
-What: /sys/.../iio:deviceX/out_voltageX_scale_available
-What: /sys/.../iio:deviceX/out_altvoltageX_scale_available
+What: /sys/.../iio:deviceX/out_voltageY_scale_available
+What: /sys/.../iio:deviceX/out_altvoltageY_scale_available
What: /sys/.../iio:deviceX/in_capacitance_scale_available
What: /sys/.../iio:deviceX/in_pressure_scale_available
What: /sys/.../iio:deviceX/in_pressureY_scale_available
@@ -639,6 +699,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_hardwaregain
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_clear_hardwaregain
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_hardwaregain
KernelVersion: 2.6.35
Contact: linux-iio@vger.kernel.org
Description:
@@ -706,7 +767,10 @@ Description:
1kohm_to_gnd: connected to ground via an 1kOhm resistor,
2.5kohm_to_gnd: connected to ground via a 2.5kOhm resistor,
6kohm_to_gnd: connected to ground via a 6kOhm resistor,
+ 7.7kohm_to_gnd: connected to ground via a 7.7kOhm resistor,
20kohm_to_gnd: connected to ground via a 20kOhm resistor,
+ 32kohm_to_gnd: connected to ground via a 32kOhm resistor,
+ 42kohm_to_gnd: connected to ground via a 42kOhm resistor,
90kohm_to_gnd: connected to ground via a 90kOhm resistor,
100kohm_to_gnd: connected to ground via an 100kOhm resistor,
125kohm_to_gnd: connected to ground via an 125kOhm resistor,
@@ -767,7 +831,11 @@ Description:
all the other channels, since it involves changing the VCO
fundamental output frequency.
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_i_phase
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltageY_q_phase
What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_phase
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_i_phase
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_q_phase
KernelVersion: 3.4.0
Contact: linux-iio@vger.kernel.org
Description:
@@ -830,6 +898,7 @@ What: /sys/.../iio:deviceX/events/in_tempY_thresh_rising_en
What: /sys/.../iio:deviceX/events/in_tempY_thresh_falling_en
What: /sys/.../iio:deviceX/events/in_capacitanceY_thresh_rising_en
What: /sys/.../iio:deviceX/events/in_capacitanceY_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_pressure_thresh_rising_en
KernelVersion: 2.6.37
Contact: linux-iio@vger.kernel.org
Description:
@@ -858,6 +927,7 @@ What: /sys/.../iio:deviceX/events/in_accel_y_roc_rising_en
What: /sys/.../iio:deviceX/events/in_accel_y_roc_falling_en
What: /sys/.../iio:deviceX/events/in_accel_z_roc_rising_en
What: /sys/.../iio:deviceX/events/in_accel_z_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_x&y&z_roc_rising_en
What: /sys/.../iio:deviceX/events/in_anglvel_x_roc_rising_en
What: /sys/.../iio:deviceX/events/in_anglvel_x_roc_falling_en
What: /sys/.../iio:deviceX/events/in_anglvel_y_roc_rising_en
@@ -933,6 +1003,7 @@ Description:
to the raw signal, allowing slow tracking to resume and the
adaptive threshold event detection to function as expected.
+What: /sys/.../events/in_accel_mag_adaptive_rising_value
What: /sys/.../events/in_accel_thresh_rising_value
What: /sys/.../events/in_accel_thresh_falling_value
What: /sys/.../events/in_accel_x_raw_thresh_rising_value
@@ -977,6 +1048,7 @@ What: /sys/.../events/in_capacitanceY_thresh_rising_value
What: /sys/.../events/in_capacitanceY_thresh_falling_value
What: /sys/.../events/in_capacitanceY_thresh_adaptive_rising_value
What: /sys/.../events/in_capacitanceY_thresh_falling_rising_value
+What: /sys/.../events/in_pressure_thresh_rising_value
KernelVersion: 2.6.37
Contact: linux-iio@vger.kernel.org
Description:
@@ -1079,6 +1151,7 @@ Description:
will get activated once in_voltage0_raw goes above 1200 and will become
deactivated again once the value falls below 1150.
+What: /sys/.../events/in_accel_roc_rising_value
What: /sys/.../events/in_accel_x_raw_roc_rising_value
What: /sys/.../events/in_accel_x_raw_roc_falling_value
What: /sys/.../events/in_accel_y_raw_roc_rising_value
@@ -1125,6 +1198,8 @@ Description:
value is in raw device units or in processed units (as _raw
and _input do on sysfs direct channel read attributes).
+What: /sys/.../events/in_accel_mag_adaptive_rising_period
+What: /sys/.../events/in_accel_roc_rising_period
What: /sys/.../events/in_accel_x_thresh_rising_period
What: /sys/.../events/in_accel_x_thresh_falling_period
What: /sys/.../events/in_accel_x_roc_rising_period
@@ -1294,6 +1369,15 @@ Description:
number or direction is not specified, applies to all channels of
this type.
+What: /sys/.../iio:deviceX/events/in_accel_x_mag_adaptive_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_adaptive_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_z_mag_adaptive_rising_en
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Similar to in_accel_x_thresh[_rising|_falling]_en, but here the
+ magnitude of the channel is compared to the adaptive threshold.
+
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_en
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_en
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_en
@@ -1396,10 +1480,6 @@ What: /sys/.../iio:deviceX/bufferY/in_timestamp_en
What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_en
What: /sys/.../iio:deviceX/bufferY/in_voltageY_en
What: /sys/.../iio:deviceX/bufferY/in_voltageY-voltageZ_en
-What: /sys/.../iio:deviceX/bufferY/in_voltageY_i_en
-What: /sys/.../iio:deviceX/bufferY/in_voltageY_q_en
-What: /sys/.../iio:deviceX/bufferY/in_voltage_i_en
-What: /sys/.../iio:deviceX/bufferY/in_voltage_q_en
What: /sys/.../iio:deviceX/bufferY/in_incli_x_en
What: /sys/.../iio:deviceX/bufferY/in_incli_y_en
What: /sys/.../iio:deviceX/bufferY/in_pressureY_en
@@ -1420,10 +1500,6 @@ What: /sys/.../iio:deviceX/bufferY/in_incli_type
What: /sys/.../iio:deviceX/bufferY/in_voltageY_type
What: /sys/.../iio:deviceX/bufferY/in_voltage_type
What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_type
-What: /sys/.../iio:deviceX/bufferY/in_voltageY_i_type
-What: /sys/.../iio:deviceX/bufferY/in_voltageY_q_type
-What: /sys/.../iio:deviceX/bufferY/in_voltage_i_type
-What: /sys/.../iio:deviceX/bufferY/in_voltage_q_type
What: /sys/.../iio:deviceX/bufferY/in_timestamp_type
What: /sys/.../iio:deviceX/bufferY/in_pressureY_type
What: /sys/.../iio:deviceX/bufferY/in_pressure_type
@@ -1461,10 +1537,6 @@ Description:
What: /sys/.../iio:deviceX/bufferY/in_voltageY_index
What: /sys/.../iio:deviceX/bufferY/in_voltageY_supply_index
-What: /sys/.../iio:deviceX/bufferY/in_voltageY_i_index
-What: /sys/.../iio:deviceX/bufferY/in_voltageY_q_index
-What: /sys/.../iio:deviceX/bufferY/in_voltage_i_index
-What: /sys/.../iio:deviceX/bufferY/in_voltage_q_index
What: /sys/.../iio:deviceX/bufferY/in_accel_x_index
What: /sys/.../iio:deviceX/bufferY/in_accel_y_index
What: /sys/.../iio:deviceX/bufferY/in_accel_z_index
@@ -1519,7 +1591,7 @@ Description:
This attribute is used to read the amount of quadrature error
present in the device at a given time.
-What: /sys/.../iio:deviceX/in_accelX_power_mode
+What: /sys/.../iio:deviceX/in_accelY_power_mode
KernelVersion: 3.11
Contact: linux-iio@vger.kernel.org
Description:
@@ -1531,6 +1603,9 @@ Description:
What: /sys/.../iio:deviceX/in_energy_input
What: /sys/.../iio:deviceX/in_energy_raw
+What: /sys/.../iio:deviceX/in_energyY_active_raw
+What: /sys/.../iio:deviceX/in_energyY_reactive_raw
+What: /sys/.../iio:deviceX/in_energyY_apparent_raw
KernelVersion: 4.0
Contact: linux-iio@vger.kernel.org
Description:
@@ -1590,6 +1665,10 @@ What: /sys/.../iio:deviceX/in_intensityY_uv_raw
What: /sys/.../iio:deviceX/in_intensityY_uva_raw
What: /sys/.../iio:deviceX/in_intensityY_uvb_raw
What: /sys/.../iio:deviceX/in_intensityY_duv_raw
+What: /sys/.../iio:deviceX/in_intensity_red_raw
+What: /sys/.../iio:deviceX/in_intensity_green_raw
+What: /sys/.../iio:deviceX/in_intensity_blue_raw
+What: /sys/.../iio:deviceX/in_intensity_clear_raw
KernelVersion: 3.4
Contact: linux-iio@vger.kernel.org
Description:
@@ -1648,22 +1727,31 @@ Description:
Raw value of rotation from true/magnetic north measured with
or without compensation from tilt sensors.
-What: /sys/bus/iio/devices/iio:deviceX/in_currentX_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_currentX_i_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_currentX_q_raw
-KernelVersion: 3.18
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
+KernelVersion: 3.17
Contact: linux-iio@vger.kernel.org
Description:
- Raw current measurement from channel X. Units are in milliamps
+ Raw current measurement from channel Y. Units are in milliamps
after application of scale and offset. If no offset or scale is
present, output should be considered as processed with the
- unit in milliamps.
+ unit in milliamps. In special cases where the channel does not
+ correspond to externally available input one of the named
+ versions may be used.
Channels with 'i' and 'q' modifiers always exist in pairs and both
channels refer to the same signal. The 'i' channel contains the in-phase
component of the signal while the 'q' channel contains the quadrature
component.
+What: /sys/bus/iio/devices/iio:deviceX/in_altcurrentY_rms_raw
+KernelVersion: 6.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled no bias removal etc.) Root Mean Square (RMS) current
+ measurement from channel Y. Units after application of scale and
+ offset are milliamps.
+
What: /sys/.../iio:deviceX/in_energy_en
What: /sys/.../iio:deviceX/in_distance_en
What: /sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_en
@@ -1821,9 +1909,9 @@ Description:
hardware fifo watermark level.
What: /sys/bus/iio/devices/iio:deviceX/in_temp_calibemissivity
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_calibemissivity
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_calibemissivity
What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_calibemissivity
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_object_calibemissivity
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_object_calibemissivity
KernelVersion: 4.1
Contact: linux-iio@vger.kernel.org
Description:
@@ -1844,17 +1932,17 @@ Description:
is considered as one sample for <type>[_name]_sampling_frequency.
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_co2_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_co2_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_co2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_ethanol_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_ethanol_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_ethanol_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_h2_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_h2_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_h2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_o2_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_o2_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_o2_raw
What: /sys/bus/iio/devices/iio:deviceX/in_concentration_voc_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_concentrationY_voc_raw
KernelVersion: 4.3
Contact: linux-iio@vger.kernel.org
Description:
@@ -1862,9 +1950,9 @@ Description:
after application of scale and offset are percents.
What: /sys/bus/iio/devices/iio:deviceX/in_resistance_raw
-What: /sys/bus/iio/devices/iio:deviceX/in_resistanceX_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_resistanceY_raw
What: /sys/bus/iio/devices/iio:deviceX/out_resistance_raw
-What: /sys/bus/iio/devices/iio:deviceX/out_resistanceX_raw
+What: /sys/bus/iio/devices/iio:deviceX/out_resistanceY_raw
KernelVersion: 4.3
Contact: linux-iio@vger.kernel.org
Description:
@@ -2053,7 +2141,7 @@ Description:
One of the following thermocouple types: B, E, J, K, N, R, S, T.
What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_calibambient
-What: /sys/bus/iio/devices/iio:deviceX/in_tempX_object_calibambient
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_object_calibambient
KernelVersion: 5.10
Contact: linux-iio@vger.kernel.org
Description:
@@ -2129,9 +2217,9 @@ Description:
- a range specified as "[min step max]"
-What: /sys/bus/iio/devices/iio:deviceX/in_voltageX_sampling_frequency
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sampling_frequency
What: /sys/bus/iio/devices/iio:deviceX/in_powerY_sampling_frequency
-What: /sys/bus/iio/devices/iio:deviceX/in_currentZ_sampling_frequency
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_sampling_frequency
KernelVersion: 5.20
Contact: linux-iio@vger.kernel.org
Description:
@@ -2225,6 +2313,52 @@ Description:
An example format is 16-bytes, 2-digits-per-byte, HEX-string
representing the sensor unique ID number.
+What: /sys/bus/iio/devices/iio:deviceX/filter_type_available
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_type_available
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns a list with the possible filter modes. Options
+ for the attribute:
+
+ * "none" - Filter is disabled/bypassed.
+ * "sinc1" - The digital sinc1 filter. Fast 1st
+ conversion time. Poor noise performance.
+ * "sinc3" - The digital sinc3 filter. Moderate 1st
+ conversion time. Good noise performance.
+ * "sinc3+pf1" - Sinc3 + device specific Post Filter 1.
+ * "sinc3+pf2" - Sinc3 + device specific Post Filter 2.
+ * "sinc3+pf3" - Sinc3 + device specific Post Filter 3.
+ * "sinc3+pf4" - Sinc3 + device specific Post Filter 4.
+ * "sinc3+rej60" - Sinc3 + 60Hz rejection.
+ * "sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion
+ time.
+ * "sinc4" - Sinc 4. Excellent noise performance. Long
+ 1st conversion time.
+ * "sinc4+lp" - Sinc4 + Low Pass Filter.
+ * "sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion
+ time.
+ * "sinc4+rej60" - Sinc4 + 60Hz rejection.
+ * "sinc5" - The digital sinc5 filter. Excellent noise
+ performance
+ * "sinc5+avg" - Sinc5 + averaging by 4.
+ * "sinc5+pf1" - Sinc5 + device specific Post Filter 1.
+ * "sinc5+sinc1" - Sinc5 + Sinc1.
+ * "sinc5+sinc1+pf1" - Sinc5 + Sinc1 + device specific Post Filter 1.
+ * "sinc5+sinc1+pf2" - Sinc5 + Sinc1 + device specific Post Filter 2.
+ * "sinc5+sinc1+pf3" - Sinc5 + Sinc1 + device specific Post Filter 3.
+ * "sinc5+sinc1+pf4" - Sinc5 + Sinc1 + device specific Post Filter 4.
+ * "wideband" - filter with wideband low ripple passband
+ and sharp transition band.
+
+What: /sys/bus/iio/devices/iio:deviceX/filter_type
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_type
+KernelVersion: 6.1
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies which filter type apply to the channel. The possible
+ values are given by the filter_type_available attribute.
+
What: /sys/.../events/in_proximity_thresh_either_runningperiod
KernelVersion: 6.6
Contact: linux-iio@vger.kernel.org
@@ -2288,3 +2422,39 @@ KernelVersion: 6.7
Contact: linux-iio@vger.kernel.org
Description:
List of available timeout value for tap gesture confirmation.
+
+What: /sys/.../iio:deviceX/in_shunt_resistor
+What: /sys/.../iio:deviceX/in_current_shunt_resistor
+What: /sys/.../iio:deviceX/in_power_shunt_resistor
+KernelVersion: 6.10
+Contact: linux-iio@vger.kernel.org
+Description:
+ The value of current sense resistor in Ohms.
+
+What: /sys/.../iio:deviceX/in_attention_input
+KernelVersion: 6.13
+Contact: linux-iio@vger.kernel.org
+Description:
+ Value representing the user's attention to the system expressed
+ in units as percentage. This usually means if the user is
+ looking at the screen or not.
+
+What: /sys/.../events/in_accel_value_available
+KernelVersion: 6.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ List of available threshold values for acceleration event
+ generation. Applies to all event types on in_accel channels.
+ Units after application of scale and offset are m/s^2.
+ Expressed as:
+
+ - a range specified as "[min step max]"
+
+What: /sys/.../events/in_accel_period_available
+KernelVersion: 6.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ List of available periods for accelerometer event detection in
+ seconds, expressed as:
+
+ - a range specified as "[min step max]"
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-ad9739a b/Documentation/ABI/testing/sysfs-bus-iio-ad9739a
new file mode 100644
index 000000000000..ed59299e6f8d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-ad9739a
@@ -0,0 +1,19 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_operating_mode
+KernelVersion: 6.9
+Contact: linux-iio@vger.kernel.org
+Description:
+ DAC operating mode. One of the following modes can be selected:
+
+ * normal: This is DAC normal mode.
+ * mixed-mode: In this mode the output is effectively chopped at
+ the DAC sample rate. This has the effect of
+ reducing the power of the fundamental signal while
+ increasing the power of the images centered around
+ the DAC sample rate, thus improving the output
+ power of these images.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_operating_mode_available
+KernelVersion: 6.9
+Contact: linux-iio@vger.kernel.org
+Description:
+ Available operating modes.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta
new file mode 100644
index 000000000000..a5a8a579f4f3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad-sigma-delta
@@ -0,0 +1,23 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration
+KernelVersion: 5.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute, if available, initiates the system calibration procedure. This is done on a
+ single channel at a time. Write '1' to start the calibration.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration_mode_available
+KernelVersion: 5.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute, if available, returns a list with the possible calibration modes.
+ There are two available options:
+ "zero_scale" - calibrate to zero scale
+ "full_scale" - calibrate to full scale
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_sys_calibration_mode
+KernelVersion: 5.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute, if available, sets up the calibration mode used in the system calibration
+ procedure. Reading returns the current calibration mode.
+ Writing sets the system calibration mode.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
index f24ed6687e90..d3fad27421d6 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
@@ -4,43 +4,17 @@ Contact: linux-iio@vger.kernel.org
Description:
Reading returns a list with the possible filter modes.
- * "sinc4" - Sinc 4. Excellent noise performance. Long
- 1st conversion time. No natural 50/60Hz rejection.
-
- * "sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion
- time.
-
- * "sinc3" - Sinc3. Moderate 1st conversion time.
- Good noise performance.
-
- * "sinc3+rej60" - Sinc3 + 60Hz rejection. At a sampling
- frequency of 50Hz, achieves simultaneous 50Hz and 60Hz
- rejection.
-
- * "sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion
- time. Best used with a sampling frequency of at least
- 216.19Hz.
-
- * "sinc3+pf1" - Sinc3 + Post Filter 1. 53dB rejection @
- 50Hz, 58dB rejection @ 60Hz.
-
- * "sinc3+pf2" - Sinc3 + Post Filter 2. 70dB rejection @
- 50Hz, 70dB rejection @ 60Hz.
-
- * "sinc3+pf3" - Sinc3 + Post Filter 3. 99dB rejection @
- 50Hz, 103dB rejection @ 60Hz.
-
- * "sinc3+pf4" - Sinc3 + Post Filter 4. 103dB rejection @
- 50Hz, 109dB rejection @ 60Hz.
+ This ABI is only kept for backwards compatibility and the values
+ returned are identical to filter_type_available attribute
+ documented in Documentation/ABI/testing/sysfs-bus-iio. Please,
+ use filter_type_available like ABI to provide filter options for
+ new drivers.
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_mode
KernelVersion: 6.2
Contact: linux-iio@vger.kernel.org
Description:
- Set the filter mode of the differential channel. When the filter
- mode changes, the in_voltageY-voltageZ_sampling_frequency and
- in_voltageY-voltageZ_sampling_frequency_available attributes
- might also change to accommodate the new filter mode.
- If the current sampling frequency is out of range for the new
- filter mode, the sampling frequency will be changed to the
- closest valid one.
+ This ABI is only kept for backwards compatibility and the values
+ returned are identical to in_voltageY-voltageZ_filter_type
+ attribute documented in Documentation/ABI/testing/sysfs-bus-iio.
+ Please, use in_voltageY-voltageZ_filter_type for new drivers.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
index f8315202c8f0..28be1cabf112 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad7192
@@ -19,33 +19,9 @@ Description:
the bridge can be disconnected (when it is not being used
using the bridge_switch_en attribute.
-What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration
-KernelVersion:
-Contact: linux-iio@vger.kernel.org
-Description:
- Initiates the system calibration procedure. This is done on a
- single channel at a time. Write '1' to start the calibration.
-
What: /sys/bus/iio/devices/iio:deviceX/in_voltage2-voltage2_shorted_raw
KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Measure voltage from AIN2 pin connected to AIN(+)
and AIN(-) shorted.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode_available
-KernelVersion:
-Contact: linux-iio@vger.kernel.org
-Description:
- Reading returns a list with the possible calibration modes.
- There are two available options:
- "zero_scale" - calibrate to zero scale
- "full_scale" - calibrate to full scale
-
-What: /sys/bus/iio/devices/iio:deviceX/in_voltagex_sys_calibration_mode
-KernelVersion:
-Contact: linux-iio@vger.kernel.org
-Description:
- Sets up the calibration mode used in the system calibration
- procedure. Reading returns the current calibration mode.
- Writing sets the system calibration mode.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611 b/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611
deleted file mode 100644
index 6d2d2b094941..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611
+++ /dev/null
@@ -1,17 +0,0 @@
-What: /sys/bus/iio/devices/iio:deviceX/in_power_shunt_resistor
-Date: March 2017
-KernelVersion: 4.12
-Contact: linux-iio@vger.kernel.org
-Description: The value of the shunt resistor used to compute power drain on
- common input voltage pin (RS+). In Ohms.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_current_shunt_resistor
-Date: March 2017
-KernelVersion: 4.12
-Contact: linux-iio@vger.kernel.org
-Description: The value of the shunt resistor used to compute current flowing
- between RS+ and RS- voltage sense inputs. In Ohms.
-
-These attributes describe a single physical component, exposed as two distinct
-attributes as it is used to calculate two different values: power load and
-current flowing between RS+ and RS- inputs.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-pac1934 b/Documentation/ABI/testing/sysfs-bus-iio-adc-pac1934
new file mode 100644
index 000000000000..625b7f867847
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-pac1934
@@ -0,0 +1,9 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistorY
+KernelVersion: 6.7
+Contact: linux-iio@vger.kernel.org
+Description:
+ The value of the shunt resistor may be known only at runtime
+ and set by a client application. This attribute allows to
+ set its value in micro-ohms. X is the IIO index of the device.
+ Y is the channel number. The value is used to calculate
+ current, power and accumulated energy.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp40 b/Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp40
index 469a7c00fad4..a95547e874f1 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp40
+++ b/Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp40
@@ -15,17 +15,3 @@ Description:
Set the relative humidity. This value is sent to the sensor for
humidity compensation.
Default value: 50000 (50 % relative humidity)
-
-What: /sys/bus/iio/devices/iio:deviceX/in_resistance_calibbias
-Date: August 2021
-KernelVersion: 5.15
-Contact: Andreas Klinger <ak@it-klinger.de>
-Description:
- Set the bias value for the resistance which is used for
- calculation of in_concentration_input as follows:
-
- x = (in_resistance_raw - in_resistance_calibbias) * 0.65
-
- in_concentration_input = 500 / (1 + e^x)
-
- Default value: 30000
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-cros-ec b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
index adf24c40126f..9e3926243797 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
+++ b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec
@@ -7,16 +7,6 @@ Description:
corresponding calibration offsets can be read from `*_calibbias`
entries.
-What: /sys/bus/iio/devices/iio:deviceX/location
-Date: July 2015
-KernelVersion: 4.7
-Contact: linux-iio@vger.kernel.org
-Description:
- This attribute returns a string with the physical location where
- the motion sensor is placed. For example, in a laptop a motion
- sensor can be located on the base or on the lid. Current valid
- values are 'base' and 'lid'.
-
What: /sys/bus/iio/devices/iio:deviceX/id
Date: September 2017
KernelVersion: 4.14
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dac b/Documentation/ABI/testing/sysfs-bus-iio-dac
new file mode 100644
index 000000000000..810eaac5533c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dac
@@ -0,0 +1,61 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_toggle_en
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Toggle enable. Write 1 to enable toggle or 0 to disable it. This
+ is useful when one wants to change the DAC output codes. For
+ autonomous toggling, the way it should be done is:
+
+ - disable toggle operation;
+ - change out_currentY_rawN, where N is the integer value of the symbol;
+ - enable toggle operation.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_rawN
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute has the same meaning as out_currentY_raw. It is
+ specific to toggle enabled channels and refers to the DAC output
+ code in INPUT_N (_rawN), where N is the integer value of the symbol.
+ The same scale and offset as in out_currentY_raw applies.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_symbol
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Performs a SW switch to a predefined output symbol. This attribute
+ is specific to toggle enabled channels and allows switching between
+ multiple predefined symbols. Each symbol corresponds to a different
+ output, denoted as out_currentY_rawN, where N is the integer value
+ of the symbol. Writing an integer value N will select out_currentY_rawN.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Toggle enable. Write 1 to enable toggle or 0 to disable it. This
+ is useful when one wants to change the DAC output codes. For
+ autonomous toggling, the way it should be done is:
+
+ - disable toggle operation;
+ - change out_voltageY_rawN, where N is the integer value of the symbol;
+ - enable toggle operation.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_rawN
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute has the same meaning as out_currentY_raw. It is
+ specific to toggle enabled channels and refers to the DAC output
+ code in INPUT_N (_rawN), where N is the integer value of the symbol.
+ The same scale and offset as in out_currentY_raw applies.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
+KernelVersion: 5.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Performs a SW switch to a predefined output symbol. This attribute
+ is specific to toggle enabled channels and allows switching between
+ multiple predefined symbols. Each symbol corresponds to a different
+ output, denoted as out_voltageY_rawN, where N is the integer value
+ of the symbol. Writing an integer value N will select out_voltageY_rawN.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688 b/Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
index 1c35971277ba..ae95a5477382 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
+++ b/Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
@@ -53,34 +53,3 @@ KernelVersion: 5.18
Contact: linux-iio@vger.kernel.org
Description:
Returns the available values for the dither phase.
-
-What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
-KernelVersion: 5.18
-Contact: linux-iio@vger.kernel.org
-Description:
- Toggle enable. Write 1 to enable toggle or 0 to disable it. This is
- useful when one wants to change the DAC output codes. The way it should
- be done is:
-
- - disable toggle operation;
- - change out_voltageY_raw0 and out_voltageY_raw1;
- - enable toggle operation.
-
-What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw0
-What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw1
-KernelVersion: 5.18
-Contact: linux-iio@vger.kernel.org
-Description:
- It has the same meaning as out_voltageY_raw. This attribute is
- specific to toggle enabled channels and refers to the DAC output
- code in INPUT_A (_raw0) and INPUT_B (_raw1). The same scale and offset
- as in out_voltageY_raw applies.
-
-What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
-KernelVersion: 5.18
-Contact: linux-iio@vger.kernel.org
-Description:
- Performs a SW toggle. This attribute is specific to toggle
- enabled channels and allows to toggle between out_voltageY_raw0
- and out_voltageY_raw1 through software. Writing 0 will select
- out_voltageY_raw0 while 1 selects out_voltageY_raw1.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818 b/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818
index 31dbb390573f..c431f0a13cf5 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818
+++ b/Documentation/ABI/testing/sysfs-bus-iio-filter-admv8818
@@ -3,7 +3,7 @@ KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Reading this returns the valid values that can be written to the
- on_altvoltage0_mode attribute:
+ filter_mode attribute:
- auto -> Adjust bandpass filter to track changes in input clock rate.
- manual -> disable/unregister the clock rate notifier / input clock tracking.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013
index de1e323e5d47..9cf8cd0dd2df 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1013
@@ -1,10 +1,10 @@
-What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-1_i_calibphase
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-altvoltage1_i_calibphase
KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
Read/write unscaled value for the Local Oscillatior path quadrature I phase shift.
-What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-1_q_calibphase
+What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0-altvoltage1_q_calibphase
KernelVersion:
Contact: linux-iio@vger.kernel.org
Description:
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc b/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc
index 8916f7ec6507..8dbca113112d 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc
+++ b/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc
@@ -13,12 +13,3 @@ Description:
available for reading data. However, samples can be occasionally skipped
or repeated, depending on the beat between the capture and conversion
rates.
-
-What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistor
-Date: December 2015
-KernelVersion: 4.4
-Contact: linux-iio@vger.kernel.org
-Description:
- The value of the shunt resistor may be known only at runtime fom an
- eeprom content read by a client application. This attribute allows to
- set its value in ohms.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-inv_icm42600 b/Documentation/ABI/testing/sysfs-bus-iio-inv_icm42600
new file mode 100644
index 000000000000..7eeacfb7650d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-inv_icm42600
@@ -0,0 +1,18 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_power_mode
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Accelerometer power mode. Setting this attribute will set the
+ requested power mode to use if the ODR support it. If ODR
+ support only 1 mode, power mode will be enforced.
+ Reading this attribute will return the current accelerometer
+ power mode if the sensor is on, or the requested value if the
+ sensor is off. The value between real and requested value can
+ be different for ODR supporting only 1 mode.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_power_mode_available
+KernelVersion: 6.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ List of available accelerometer power modes that can be set in
+ in_accel_power_mode attribute.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index ecf47559f495..b767db2c52cb 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -163,6 +163,17 @@ Description:
will be present in sysfs. Writing 1 to this file
will perform reset.
+What: /sys/bus/pci/devices/.../reset_subordinate
+Date: October 2024
+Contact: linux-pci@vger.kernel.org
+Description:
+ This is visible only for bridge devices. If you want to reset
+ all devices attached through the subordinate bus of a specific
+ bridge device, writing 1 to this will try to do it. This will
+ affect all devices attached to the system through this bridge
+ similiar to writing 1 to their individual "reset" file, so use
+ with caution.
+
What: /sys/bus/pci/devices/.../vpd
Date: February 2008
Contact: Ben Hutchings <bwh@kernel.org>
@@ -500,3 +511,194 @@ Description:
console drivers from the device. Raw users of pci-sysfs
resourceN attributes must be terminated prior to resizing.
Success of the resizing operation is not guaranteed.
+
+What: /sys/bus/pci/devices/.../leds/*:enclosure:*/brightness
+What: /sys/class/leds/*:enclosure:*/brightness
+Date: August 2024
+KernelVersion: 6.12
+Description:
+ LED indications on PCIe storage enclosures which are controlled
+ through the NPEM interface (Native PCIe Enclosure Management,
+ PCIe r6.1 sec 6.28) are accessible as led class devices, both
+ below /sys/class/leds and below NPEM-capable PCI devices.
+
+ Although these led class devices could be manipulated manually,
+ in practice they are typically manipulated automatically by an
+ application such as ledmon(8).
+
+ The name of a led class device is as follows:
+ <bdf>:enclosure:<indication>
+ where:
+
+ - <bdf> is the domain, bus, device and function number
+ (e.g. 10000:02:05.0)
+ - <indication> is a short description of the LED indication
+
+ Valid indications per PCIe r6.1 table 6-27 are:
+
+ - ok (drive is functioning normally)
+ - locate (drive is being identified by an admin)
+ - fail (drive is not functioning properly)
+ - rebuild (drive is part of an array that is rebuilding)
+ - pfa (drive is predicted to fail soon)
+ - hotspare (drive is marked to be used as a replacement)
+ - ica (drive is part of an array that is degraded)
+ - ifa (drive is part of an array that is failed)
+ - idt (drive is not the right type for the connector)
+ - disabled (drive is disabled, removal is safe)
+ - specific0 to specific7 (enclosure-specific indications)
+
+ Broadly, the indications fall into one of these categories:
+
+ - to signify drive state (ok, locate, fail, idt, disabled)
+ - to signify drive role or state in a software RAID array
+ (rebuild, pfa, hotspare, ica, ifa)
+ - to signify any other role or state (specific0 to specific7)
+
+ Mandatory indications per PCIe r6.1 sec 7.9.19.2 comprise:
+ ok, locate, fail, rebuild. All others are optional.
+ A led class device is only visible if the corresponding
+ indication is supported by the device.
+
+ To manipulate the indications, write 0 (LED_OFF) or 1 (LED_ON)
+ to the "brightness" file. Note that manipulating an indication
+ may implicitly manipulate other indications at the vendor's
+ discretion. E.g. when the user lights up the "ok" indication,
+ the vendor may choose to automatically turn off the "fail"
+ indication. The current state of an indication can be
+ retrieved by reading its "brightness" file.
+
+ The PCIe Base Specification allows vendors leeway to choose
+ different colors or blinking patterns for the indications,
+ but they typically follow the IBPI standard. E.g. the "locate"
+ indication is usually presented as one or two LEDs blinking at
+ 4 Hz frequency:
+ https://en.wikipedia.org/wiki/International_Blinking_Pattern_Interpretation
+
+ PCI Firmware Specification r3.3 sec 4.7 defines a DSM interface
+ to facilitate shared access by operating system and platform
+ firmware to a device's NPEM registers. The kernel will use
+ this DSM interface where available, instead of accessing NPEM
+ registers directly. The DSM interface does not support the
+ enclosure-specific indications "specific0" to "specific7",
+ hence the corresponding led class devices are unavailable if
+ the DSM interface is used.
+
+What: /sys/bus/pci/devices/.../doe_features
+Date: March 2025
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ This directory contains a list of the supported Data Object
+ Exchange (DOE) features. The features are the file name.
+ The contents of each file is the raw Vendor ID and data
+ object feature values.
+
+ The value comes from the device and specifies the vendor and
+ data object type supported. The lower (RHS of the colon) is
+ the data object type in hex. The upper (LHS of the colon)
+ is the vendor ID.
+
+ As all DOE devices must support the DOE discovery feature,
+ if DOE is supported you will at least see the doe_discovery
+ file, with this contents:
+
+ # cat doe_features/doe_discovery
+ 0001:00
+
+ If the device supports other features you will see other
+ files as well. For example if CMA/SPDM and secure CMA/SPDM
+ are supported the doe_features directory will look like
+ this:
+
+ # ls doe_features
+ 0001:01 0001:02 doe_discovery
+
+What: /sys/bus/pci/devices/.../serial_number
+Date: December 2025
+Contact: Matthew Wood <thepacketgeek@gmail.com>
+Description:
+ This is visible only for PCI devices that support the serial
+ number extended capability. The file is read only and due to
+ the possible sensitivity of accessible serial numbers, admin
+ only.
+
+What: /sys/bus/pci/devices/.../tsm/
+Contact: linux-coco@lists.linux.dev
+Description:
+ This directory only appears if a physical device function
+ supports authentication (PCIe CMA-SPDM), interface security
+ (PCIe TDISP), and is accepted for secure operation by the
+ platform TSM driver. This attribute directory appears
+ dynamically after the platform TSM driver loads. So, only after
+ the /sys/class/tsm/tsm0 device arrives can tools assume that
+ devices without a tsm/ attribute directory will never have one;
+ before that, the security capabilities of the device relative to
+ the platform TSM are unknown. See
+ Documentation/ABI/testing/sysfs-class-tsm.
+
+What: /sys/bus/pci/devices/.../tsm/connect
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RW) Write the name of a TSM (TEE Security Manager) device from
+ /sys/class/tsm to this file to establish a connection with the
+ device. This typically includes an SPDM (DMTF Security
+ Protocols and Data Models) session over PCIe DOE (Data Object
+ Exchange) and may also include PCIe IDE (Integrity and Data
+ Encryption) establishment. Reads from this attribute return the
+ name of the connected TSM or the empty string if not
+ connected. A TSM device signals its readiness to accept PCI
+ connection via a KOBJ_CHANGE event.
+
+What: /sys/bus/pci/devices/.../tsm/disconnect
+Contact: linux-coco@lists.linux.dev
+Description:
+ (WO) Write the name of the TSM device that was specified
+ to 'connect' to teardown the connection.
+
+What: /sys/bus/pci/devices/.../tsm/dsm
+Contact: linux-coco@lists.linux.dev
+Description: (RO) Return PCI device name of this device's DSM (Device
+ Security Manager). When a device is in the connected state it
+ indicates that the platform TSM (TEE Security Manager) has made
+ a secure-session connection with a device's DSM. A DSM is always
+ physical function 0 and when the device supports TDISP (TEE
+ Device Interface Security Protocol) its managed functions also
+ populate this tsm/dsm attribute. The managed functions of a DSM
+ are SR-IOV (Single Root I/O Virtualization) virtual functions,
+ non-zero functions of a multi-function device, or downstream
+ endpoints depending on whether the DSM is an SR-IOV physical
+ function, function0 of a multi-function device, or an upstream
+ PCIe switch port. This is a "link" TSM attribute, see
+ Documentation/ABI/testing/sysfs-class-tsm.
+
+What: /sys/bus/pci/devices/.../tsm/bound
+Contact: linux-coco@lists.linux.dev
+Description: (RO) Return the device name of the TSM when the device is in a
+ TDISP (TEE Device Interface Security Protocol) operational state
+ (LOCKED, RUN, or ERROR, not UNLOCKED). Bound devices consume
+ platform TSM resources and depend on the device's configuration
+ (e.g. BME (Bus Master Enable) and MSE (Memory Space Enable)
+ among other settings) to remain stable for the duration of the
+ bound state. This attribute is only visible for devices that
+ support TDISP operation, and it is only populated after
+ successful connect and TSM bind. The TSM bind operation is
+ initiated by VFIO/IOMMUFD. This is a "link" TSM attribute, see
+ Documentation/ABI/testing/sysfs-class-tsm.
+
+What: /sys/bus/pci/devices/.../authenticated
+Contact: linux-pci@vger.kernel.org
+Description:
+ When the device's tsm/ directory is present device
+ authentication (PCIe CMA-SPDM) and link encryption (PCIe IDE)
+ are handled by the platform TSM (TEE Security Manager). When the
+ tsm/ directory is not present this attribute reflects only the
+ native CMA-SPDM authentication state with the kernel's
+ certificate store.
+
+ If the attribute is not present, it indicates that
+ authentication is unsupported by the device, or the TSM has no
+ available authentication methods for the device.
+
+ When present and the tsm/ attribute directory is present, the
+ authenticated attribute is an alias for the device 'connect'
+ state. See the 'tsm/connect' attribute for more details.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer
new file mode 100644
index 000000000000..5ed284523956
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer
@@ -0,0 +1,163 @@
+PCIe Device AER statistics
+--------------------------
+
+These attributes show up under all the devices that are AER capable. These
+statistical counters indicate the errors "as seen/reported by the device".
+Note that this may mean that if an endpoint is causing problems, the AER
+counters may increment at its link partner (e.g. root port) because the
+errors may be "seen" / reported by the link partner and not the
+problematic endpoint itself (which may report all counters as 0 as it never
+saw any problems).
+
+What: /sys/bus/pci/devices/<dev>/aer_dev_correctable
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: List of correctable errors seen and reported by this
+ PCI device using ERR_COR. Note that since multiple errors may
+ be reported using a single ERR_COR message, thus
+ TOTAL_ERR_COR at the end of the file may not match the actual
+ total of all the errors in the file. Sample output::
+
+ localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_correctable
+ Receiver Error 2
+ Bad TLP 0
+ Bad DLLP 0
+ RELAY_NUM Rollover 0
+ Replay Timer Timeout 0
+ Advisory Non-Fatal 0
+ Corrected Internal Error 0
+ Header Log Overflow 0
+ TOTAL_ERR_COR 2
+
+What: /sys/bus/pci/devices/<dev>/aer_dev_fatal
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: List of uncorrectable fatal errors seen and reported by this
+ PCI device using ERR_FATAL. Note that since multiple errors may
+ be reported using a single ERR_FATAL message, thus
+ TOTAL_ERR_FATAL at the end of the file may not match the actual
+ total of all the errors in the file. Sample output::
+
+ localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_fatal
+ Undefined 0
+ Data Link Protocol 0
+ Surprise Down Error 0
+ Poisoned TLP 0
+ Flow Control Protocol 0
+ Completion Timeout 0
+ Completer Abort 0
+ Unexpected Completion 0
+ Receiver Overflow 0
+ Malformed TLP 0
+ ECRC 0
+ Unsupported Request 0
+ ACS Violation 0
+ Uncorrectable Internal Error 0
+ MC Blocked TLP 0
+ AtomicOp Egress Blocked 0
+ TLP Prefix Blocked Error 0
+ TOTAL_ERR_FATAL 0
+
+What: /sys/bus/pci/devices/<dev>/aer_dev_nonfatal
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: List of uncorrectable nonfatal errors seen and reported by this
+ PCI device using ERR_NONFATAL. Note that since multiple errors
+ may be reported using a single ERR_FATAL message, thus
+ TOTAL_ERR_NONFATAL at the end of the file may not match the
+ actual total of all the errors in the file. Sample output::
+
+ localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_nonfatal
+ Undefined 0
+ Data Link Protocol 0
+ Surprise Down Error 0
+ Poisoned TLP 0
+ Flow Control Protocol 0
+ Completion Timeout 0
+ Completer Abort 0
+ Unexpected Completion 0
+ Receiver Overflow 0
+ Malformed TLP 0
+ ECRC 0
+ Unsupported Request 0
+ ACS Violation 0
+ Uncorrectable Internal Error 0
+ MC Blocked TLP 0
+ AtomicOp Egress Blocked 0
+ TLP Prefix Blocked Error 0
+ TOTAL_ERR_NONFATAL 0
+
+PCIe Rootport AER statistics
+----------------------------
+
+These attributes show up under only the rootports (or root complex event
+collectors) that are AER capable. These indicate the number of error messages as
+"reported to" the rootport. Please note that the rootports also transmit
+(internally) the ERR_* messages for errors seen by the internal rootport PCI
+device, so these counters include them and are thus cumulative of all the error
+messages on the PCI hierarchy originating at that root port.
+
+What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_cor
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: Total number of ERR_COR messages reported to rootport.
+
+What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_fatal
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: Total number of ERR_FATAL messages reported to rootport.
+
+What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_nonfatal
+Date: July 2018
+KernelVersion: 4.19.0
+Contact: linux-pci@vger.kernel.org, rajatja@google.com
+Description: Total number of ERR_NONFATAL messages reported to rootport.
+
+PCIe AER ratelimits
+-------------------
+
+These attributes show up under all the devices that are AER capable.
+They represent configurable ratelimits of logs per error type.
+
+See Documentation/PCI/pcieaer-howto.rst for more info on ratelimits.
+
+What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_interval_ms
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Writing 0 disables AER correctable error log ratelimiting.
+ Writing a positive value sets the ratelimit interval in ms.
+ Default is DEFAULT_RATELIMIT_INTERVAL (5000 ms).
+
+What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_burst
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Ratelimit burst for correctable error logs. Writing a value
+ changes the number of errors (burst) allowed per interval
+ before ratelimiting. Reading gets the current ratelimit
+ burst. Default is DEFAULT_RATELIMIT_BURST (10).
+
+What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_interval_ms
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Writing 0 disables AER non-fatal uncorrectable error log
+ ratelimiting. Writing a positive value sets the ratelimit
+ interval in ms. Default is DEFAULT_RATELIMIT_INTERVAL
+ (5000 ms).
+
+What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_burst
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Ratelimit burst for non-fatal uncorrectable error logs.
+ Writing a value changes the number of errors (burst)
+ allowed per interval before ratelimiting. Reading gets the
+ current ratelimit burst. Default is DEFAULT_RATELIMIT_BURST
+ (10).
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
deleted file mode 100644
index 860db53037a5..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+++ /dev/null
@@ -1,119 +0,0 @@
-PCIe Device AER statistics
---------------------------
-
-These attributes show up under all the devices that are AER capable. These
-statistical counters indicate the errors "as seen/reported by the device".
-Note that this may mean that if an endpoint is causing problems, the AER
-counters may increment at its link partner (e.g. root port) because the
-errors may be "seen" / reported by the link partner and not the
-problematic endpoint itself (which may report all counters as 0 as it never
-saw any problems).
-
-What: /sys/bus/pci/devices/<dev>/aer_dev_correctable
-Date: July 2018
-KernelVersion: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: List of correctable errors seen and reported by this
- PCI device using ERR_COR. Note that since multiple errors may
- be reported using a single ERR_COR message, thus
- TOTAL_ERR_COR at the end of the file may not match the actual
- total of all the errors in the file. Sample output::
-
- localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_correctable
- Receiver Error 2
- Bad TLP 0
- Bad DLLP 0
- RELAY_NUM Rollover 0
- Replay Timer Timeout 0
- Advisory Non-Fatal 0
- Corrected Internal Error 0
- Header Log Overflow 0
- TOTAL_ERR_COR 2
-
-What: /sys/bus/pci/devices/<dev>/aer_dev_fatal
-Date: July 2018
-KernelVersion: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: List of uncorrectable fatal errors seen and reported by this
- PCI device using ERR_FATAL. Note that since multiple errors may
- be reported using a single ERR_FATAL message, thus
- TOTAL_ERR_FATAL at the end of the file may not match the actual
- total of all the errors in the file. Sample output::
-
- localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_fatal
- Undefined 0
- Data Link Protocol 0
- Surprise Down Error 0
- Poisoned TLP 0
- Flow Control Protocol 0
- Completion Timeout 0
- Completer Abort 0
- Unexpected Completion 0
- Receiver Overflow 0
- Malformed TLP 0
- ECRC 0
- Unsupported Request 0
- ACS Violation 0
- Uncorrectable Internal Error 0
- MC Blocked TLP 0
- AtomicOp Egress Blocked 0
- TLP Prefix Blocked Error 0
- TOTAL_ERR_FATAL 0
-
-What: /sys/bus/pci/devices/<dev>/aer_dev_nonfatal
-Date: July 2018
-KernelVersion: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: List of uncorrectable nonfatal errors seen and reported by this
- PCI device using ERR_NONFATAL. Note that since multiple errors
- may be reported using a single ERR_FATAL message, thus
- TOTAL_ERR_NONFATAL at the end of the file may not match the
- actual total of all the errors in the file. Sample output::
-
- localhost /sys/devices/pci0000:00/0000:00:1c.0 # cat aer_dev_nonfatal
- Undefined 0
- Data Link Protocol 0
- Surprise Down Error 0
- Poisoned TLP 0
- Flow Control Protocol 0
- Completion Timeout 0
- Completer Abort 0
- Unexpected Completion 0
- Receiver Overflow 0
- Malformed TLP 0
- ECRC 0
- Unsupported Request 0
- ACS Violation 0
- Uncorrectable Internal Error 0
- MC Blocked TLP 0
- AtomicOp Egress Blocked 0
- TLP Prefix Blocked Error 0
- TOTAL_ERR_NONFATAL 0
-
-PCIe Rootport AER statistics
-----------------------------
-
-These attributes show up under only the rootports (or root complex event
-collectors) that are AER capable. These indicate the number of error messages as
-"reported to" the rootport. Please note that the rootports also transmit
-(internally) the ERR_* messages for errors seen by the internal rootport PCI
-device, so these counters include them and are thus cumulative of all the error
-messages on the PCI hierarchy originating at that root port.
-
-What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor
-Date: July 2018
-KernelVersion: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: Total number of ERR_COR messages reported to rootport.
-
-What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal
-Date: July 2018
-KernelVersion: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: Total number of ERR_FATAL messages reported to rootport.
-
-What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal
-Date: July 2018
-KernelVersion: 4.19.0
-Contact: linux-pci@vger.kernel.org, rajatja@google.com
-Description: Total number of ERR_NONFATAL messages reported to rootport.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-avs b/Documentation/ABI/testing/sysfs-bus-pci-devices-avs
new file mode 100644
index 000000000000..ebff3fa12055
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-avs
@@ -0,0 +1,8 @@
+What: /sys/devices/pci0000:00/<dev>/avs/fw_version
+Date: February 2024
+Contact: Cezary Rojewski <cezary.rojewski@intel.com>
+Description:
+ Version of AudioDSP firmware ASoC avs driver is communicating
+ with.
+
+ Format: %d.%d.%d.%d, type:major:minor:build.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd b/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
index 5a775b8f6543..fc82aa4e54b0 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
+++ b/Documentation/ABI/testing/sysfs-bus-pci-drivers-xhci_hcd
@@ -75,3 +75,13 @@ Description:
The default value is 1 (GNU Remote Debug command).
Other permissible value is 0 which is for vendor defined debug
target.
+
+What: /sys/bus/pci/drivers/xhci_hcd/.../dbc_poll_interval_ms
+Date: February 2024
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ This attribute adjust the polling interval used to check for
+ DbC events. Unit is milliseconds. Accepted values range from 0
+ up to 5000. The default value is 64 ms.
+ This polling interval is used while DbC is enabled but has no
+ active data transfers.
diff --git a/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
new file mode 100644
index 000000000000..ac3431736f5c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform-drivers-amd_x3d_vcache
@@ -0,0 +1,12 @@
+What: /sys/bus/platform/drivers/amd_x3d_vcache/AMDI0101:00/amd_x3d_mode
+Date: November 2024
+KernelVersion: 6.13
+Contact: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+Description: (RW) AMD 3D V-Cache optimizer allows users to switch CPU core
+ rankings dynamically.
+
+ This file switches between these two modes:
+ - "frequency" cores within the faster CCD are prioritized before
+ those in the slower CCD.
+ - "cache" cores within the larger L3 CCD are prioritized before
+ those in the smaller L3 CCD.
diff --git a/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-dev b/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-dev
new file mode 100644
index 000000000000..b06a48c3c85a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-dev
@@ -0,0 +1,9 @@
+What: /sys/bus/platform/devices/<dev>/always_powered_in_suspend
+Date: June 2022
+KernelVersion: 5.20
+Contact: Matthias Kaehlcke <matthias@kaehlcke.net>
+ linux-usb@vger.kernel.org
+Description:
+ (RW) Controls whether the USB hub remains always powered
+ during system suspend or not. This attribute is not
+ available for non-hub devices.
diff --git a/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-hub b/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-hub
deleted file mode 100644
index 42deb0552065..000000000000
--- a/Documentation/ABI/testing/sysfs-bus-platform-onboard-usb-hub
+++ /dev/null
@@ -1,8 +0,0 @@
-What: /sys/bus/platform/devices/<dev>/always_powered_in_suspend
-Date: June 2022
-KernelVersion: 5.20
-Contact: Matthias Kaehlcke <matthias@kaehlcke.net>
- linux-usb@vger.kernel.org
-Description:
- (RW) Controls whether the USB hub remains always powered
- during system suspend or not. \ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index 2b7108e21977..af9b653422f1 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -442,6 +442,16 @@ What: /sys/bus/usb/devices/usbX/descriptors
Description:
Contains the interface descriptors, in binary.
+What: /sys/bus/usb/devices/usbX/bos_descriptors
+Date: March 2024
+Contact: Elbert Mai <code@elbertmai.com>
+Description:
+ Binary file containing the cached binary device object store (BOS)
+ of the device. This consists of the BOS descriptor followed by the
+ set of device capability descriptors. All descriptors read from
+ this file are in bus-endian format. Note that the kernel will not
+ request the BOS from a device if its bcdUSB is less than 0x0201.
+
What: /sys/bus/usb/devices/usbX/idProduct
Description:
Product ID, in hexadecimal.
diff --git a/Documentation/ABI/testing/sysfs-bus-vdpa b/Documentation/ABI/testing/sysfs-bus-vdpa
index 4da53878bff6..2c833b5163f2 100644
--- a/Documentation/ABI/testing/sysfs-bus-vdpa
+++ b/Documentation/ABI/testing/sysfs-bus-vdpa
@@ -1,6 +1,6 @@
What: /sys/bus/vdpa/drivers_autoprobe
Date: March 2020
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
This file determines whether new devices are immediately bound
to a driver after the creation. It initially contains 1, which
@@ -12,7 +12,7 @@ Description:
What: /sys/bus/vdpa/driver_probe
Date: March 2020
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
Writing a device name to this file will cause the kernel binds
devices to a compatible driver.
@@ -22,7 +22,7 @@ Description:
What: /sys/bus/vdpa/drivers/.../bind
Date: March 2020
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
Writing a device name to this file will cause the driver to
attempt to bind to the device. This is useful for overriding
@@ -30,7 +30,7 @@ Description:
What: /sys/bus/vdpa/drivers/.../unbind
Date: March 2020
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
Writing a device name to this file will cause the driver to
attempt to unbind from the device. This may be useful when
@@ -38,7 +38,7 @@ Description:
What: /sys/bus/vdpa/devices/.../driver_override
Date: November 2021
-Contact: virtualization@lists.linux-foundation.org
+Contact: virtualization@lists.linux.dev
Description:
This file allows the driver for a device to be specified.
When specified, only a driver with a name matching the value
diff --git a/Documentation/ABI/testing/sysfs-bus-wmi b/Documentation/ABI/testing/sysfs-bus-wmi
new file mode 100644
index 000000000000..d71a219c610e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-wmi
@@ -0,0 +1,81 @@
+What: /sys/bus/wmi/devices/.../driver_override
+Date: February 2024
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ This file allows the driver for a device to be specified which
+ will override standard ID table matching.
+ When specified, only a driver with a name matching the value
+ written to driver_override will have an opportunity to bind
+ to the device.
+ The override is specified by writing a string to the
+ driver_override file (echo wmi-event-dummy > driver_override).
+ The override may be cleared with an empty string (echo > \
+ driver_override) which returns the device to standard matching
+ rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to automatically
+ load the specified driver. If no driver with a matching name is
+ currently loaded in the kernel, the device will not bind to any
+ driver.
+ This also allows devices to opt-out of driver binding using a
+ driver_override name such as "none". Only a single driver may be
+ specified in the override, there is no support for parsing delimiters.
+
+What: /sys/bus/wmi/devices/.../modalias
+Date: November 20:15
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the MODALIAS value emitted by uevent for a
+ given WMI device.
+
+ Format: wmi:XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.
+
+What: /sys/bus/wmi/devices/.../guid
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the GUID used to match WMI devices to
+ compatible WMI drivers. This GUID is not necessarily unique
+ inside a given machine, it is solely used to identify the
+ interface exposed by a given WMI device.
+
+What: /sys/bus/wmi/devices/.../object_id
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the WMI object ID used internally to construct
+ the ACPI method names used by non-event WMI devices. It contains
+ two ASCII letters.
+
+What: /sys/bus/wmi/devices/.../notify_id
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the WMI notify ID used internally to map ACPI
+ events to WMI event devices. It contains two ASCII letters.
+
+What: /sys/bus/wmi/devices/.../instance_count
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains the number of WMI object instances being
+ present on a given WMI device. It contains a non-negative
+ number.
+
+What: /sys/bus/wmi/devices/.../expensive
+Date: November 2015
+Contact: Andy Lutomirski <luto@kernel.org>
+Description:
+ This file contains a boolean flag signaling if interacting with
+ the given WMI device will consume significant CPU resources.
+ The WMI driver core will take care of enabling/disabling such
+ WMI devices.
+
+What: /sys/bus/wmi/devices/.../setable
+Date: May 2017
+Contact: Darren Hart (VMware) <dvhart@infradead.org>
+Description:
+ This file contains a boolean flags signaling the data block
+ associated with the given WMI device is writable. If the
+ given WMI device is not associated with a data block, then
+ this file will not exist.
diff --git a/Documentation/ABI/testing/sysfs-class-chromeos b/Documentation/ABI/testing/sysfs-class-chromeos
index 74ece942722e..7fa5be6cc774 100644
--- a/Documentation/ABI/testing/sysfs-class-chromeos
+++ b/Documentation/ABI/testing/sysfs-class-chromeos
@@ -31,3 +31,23 @@ Date: August 2015
KernelVersion: 4.2
Description:
Show the information about the EC software and hardware.
+
+What: /sys/class/chromeos/cros_ec/usbpdmuxinfo
+Date: February 2025
+Description:
+ Show PD mux status for each typec port with following flags:
+ - "USB": USB connected
+ - "DP": DP connected
+ - "POLARITY": CC line Polarity inverted
+ - "HPD_IRQ": Hot Plug Detect interrupt is asserted
+ - "HPD_LVL": Hot Plug Detect level is asserted
+ - "SAFE": DP is in safe mode
+ - "TBT": TBT enabled
+ - "USB4": USB4 enabled
+
+What: /sys/class/chromeos/cros_ec/ap_mode_entry
+Date: February 2025
+Description:
+ Show if the AP mode entry EC feature is supported.
+ It indicates whether the EC waits for direction from the AP
+ to enter Type-C altmodes or USB4 mode.
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
index 1e7e0bb4c14e..df8ba88b9f6a 100644
--- a/Documentation/ABI/testing/sysfs-class-devfreq
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -132,3 +132,12 @@ Description:
A list of governors that support the node:
- simple_ondemand
+
+What: /sys/class/devfreq/.../related_cpus
+Date: June 2025
+Contact: Linux power management list <linux-pm@vger.kernel.org>
+Description: The list of CPUs whose performance is closely related to the
+ frequency of this devfreq domain.
+
+ This file is only present if a specific devfreq device is
+ closely associated with a subset of CPUs.
diff --git a/Documentation/ABI/testing/sysfs-class-drm b/Documentation/ABI/testing/sysfs-class-drm
new file mode 100644
index 000000000000..d23fed5e29a7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-drm
@@ -0,0 +1,8 @@
+What: /sys/class/drm/.../boot_display
+Date: January 2026
+Contact: Linux DRI developers <dri-devel@vger.kernel.org>
+Description:
+ This file indicates that displays connected to the device were
+ used to display the boot sequence. If a display connected to
+ the device was used to display the boot sequence the file will
+ be present and contain "1".
diff --git a/Documentation/ABI/testing/sysfs-class-firmware-attributes b/Documentation/ABI/testing/sysfs-class-firmware-attributes
index 9c82c7b42ff8..2713efa509b4 100644
--- a/Documentation/ABI/testing/sysfs-class-firmware-attributes
+++ b/Documentation/ABI/testing/sysfs-class-firmware-attributes
@@ -193,7 +193,7 @@ Description:
mechanism:
The means of authentication. This attribute is mandatory.
- Only supported type currently is "password".
+ Supported types are "password" or "certificate".
max_password_length:
A file that can be read to obtain the
@@ -303,6 +303,7 @@ Description:
being configured allowing anyone to make changes.
After any of these operations the system must reboot for the changes to
take effect.
+ Admin and System certificates are supported from 2025 systems onward.
certificate_thumbprint:
Read only attribute used to display the MD5, SHA1 and SHA256 thumbprints
diff --git a/Documentation/ABI/testing/sysfs-class-hwmon b/Documentation/ABI/testing/sysfs-class-hwmon
index 3dac923c9b0e..cfd0d0bab483 100644
--- a/Documentation/ABI/testing/sysfs-class-hwmon
+++ b/Documentation/ABI/testing/sysfs-class-hwmon
@@ -149,6 +149,15 @@ Description:
RW
+What: /sys/class/hwmon/hwmonX/inY_fault
+Description:
+ Reports a voltage hard failure (eg: shorted component)
+
+ - 1: Failed
+ - 0: Ok
+
+ RO
+
What: /sys/class/hwmon/hwmonX/cpuY_vid
Description:
CPU core reference voltage.
@@ -968,6 +977,15 @@ Description:
RW
+What: /sys/class/hwmon/hwmonX/humidityY_max_alarm
+Description:
+ Maximum humidity detection
+
+ - 0: OK
+ - 1: Maximum humidity detected
+
+ RO
+
What: /sys/class/hwmon/hwmonX/humidityY_max_hyst
Description:
Humidity hysteresis value for max limit.
@@ -987,6 +1005,15 @@ Description:
RW
+What: /sys/class/hwmon/hwmonX/humidityY_min_alarm
+Description:
+ Minimum humidity detection
+
+ - 0: OK
+ - 1: Minimum humidity detected
+
+ RO
+
What: /sys/class/hwmon/hwmonX/humidityY_min_hyst
Description:
Humidity hysteresis value for min limit.
diff --git a/Documentation/ABI/testing/sysfs-class-intel_pmt-features b/Documentation/ABI/testing/sysfs-class-intel_pmt-features
new file mode 100644
index 000000000000..cddb30e5bdf6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-intel_pmt-features
@@ -0,0 +1,134 @@
+What: /sys/class/intel_pmt/features-<PCI BDF>/
+Date: 2025-04-24
+KernelVersion: 6.16
+Contact: david.e.box@linux.intel.com
+Description:
+ The `features-<PCI BDF>/` directory represents the "features"
+ capability exposed by Intel PMT (Platform Monitoring Technology)
+ for the given PCI device.
+
+ Each directory corresponds to a PMT feature and contains
+ attributes describing the available telemetry, monitoring, or
+ control functionalities.
+
+Directory Structure:
+
+ /sys/class/intel_pmt/features-<PCI BDF>/
+ ├── accelerator_telemetry/ # Per-accelerator telemetry data
+ ├── crash_log/ # Contains system crash telemetry logs
+ ├── per_core_environment_telemetry/ # Environmental telemetry per core
+ ├── per_core_performance_telemetry/ # Performance telemetry per core
+ ├── per_rmid_energy_telemetry/ # Energy telemetry for RMIDs
+ ├── per_rmid_perf_telemetry/ # Performance telemetry for RMIDs
+ ├── tpmi_control/ # TPMI-related controls and telemetry
+ ├── tracing/ # PMT tracing features
+ └── uncore_telemetry/ # Uncore telemetry data
+
+Common Files (Present in all feature directories):
+
+ caps
+ - Read-only
+ - Lists available capabilities for this feature.
+
+ guids
+ - Read-only
+ - Lists GUIDs associated with this feature.
+
+Additional Attributes (Conditional Presence):
+
+ max_command_size
+ - Read-only
+ - Present if the feature supports out-of-band MCTP access.
+ - Maximum supported MCTP command size for out-of-band PMT access (bytes).
+
+ max_stream_size
+ - Read-only
+ - Present if the feature supports out-of-band MCTP access.
+ - Maximum supported MCTP stream size (bytes).
+
+ min_watcher_period_ms
+ - Read-only
+ - Present if the feature supports the watcher API.
+ The watcher API provides a writable control interface that allows user
+ configuration of monitoring behavior, such as setting the sampling or
+ reporting interval.
+ - Minimum supported time period for the watcher interface (milliseconds).
+
+ num_rmids
+ - Read-only
+ - Present if the feature supports RMID (Resource Monitoring ID) telemetry.
+ RMIDs are identifiers used by hardware to track and report resource usage,
+ such as memory bandwidth or energy consumption, on a per-logical-entity
+ basis (e.g., per core, thread, or process group).
+ - Maximum number of RMIDs tracked simultaneously.
+
+Example:
+For a device with PCI BDF `0000:00:03.1`, the directory tree could look like:
+
+ /sys/class/intel_pmt/features-0000:00:03.1/
+ ├── accelerator_telemetry/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ │ ├── min_watcher_period_ms
+ ├── crash_log/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ ├── per_core_environment_telemetry/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ │ ├── min_watcher_period_ms
+ ├── per_rmid_energy_telemetry/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ │ ├── min_watcher_period_ms
+ │ ├── num_rmids
+ ├── tpmi_control/
+ │ ├── caps
+ │ ├── guids
+ ├── tracing/
+ │ ├── caps
+ │ ├── guids
+ ├── uncore_telemetry/
+ │ ├── caps
+ │ ├── guids
+ │ ├── max_command_size
+ │ ├── max_stream_size
+ │ ├── min_watcher_period_ms
+
+Notes:
+ - Some attributes are only present if the corresponding feature supports
+ the capability (e.g., `max_command_size` for MCTP-capable features).
+ - Features supporting RMIDs include `num_rmids`.
+ - Features supporting the watcher API include `min_watcher_period_ms`.
+ - The `caps` file provides additional information about the functionality
+ of the feature.
+
+Example 'caps' content for the 'tracing' feature:
+
+ /sys/class/intel_pmt/features-0000:00:03.1/
+ ├── tracing/
+ │ ├── caps
+
+ telemetry Available: No
+ watcher Available: Yes
+ crashlog Available: No
+ streaming Available: No
+ threashold Available: No
+ window Available: No
+ config Available: Yes
+ tracing Available: No
+ inband Available: Yes
+ oob Available: Yes
+ secure_chan Available: No
+ pmt_sp Available: Yes
+ pmt_sp_policy Available: Yes
+ mailbox Available: Yes
+ bios_lock Available: Yes
diff --git a/Documentation/ABI/testing/sysfs-class-led b/Documentation/ABI/testing/sysfs-class-led
index 2e24ac3bd7ef..0313b82644f2 100644
--- a/Documentation/ABI/testing/sysfs-class-led
+++ b/Documentation/ABI/testing/sysfs-class-led
@@ -72,6 +72,12 @@ Description:
/sys/class/leds/<led> once a given trigger is selected. For
their documentation see `sysfs-class-led-trigger-*`.
+ Writing "none" removes the trigger for this LED.
+
+ Writing "default" sets the trigger to the LED's default trigger
+ (which would often be configured in the device tree for the
+ hardware).
+
What: /sys/class/leds/<led>/inverted
Date: January 2011
KernelVersion: 2.6.38
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
index a6c307c4befa..ed46b37ab8a2 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
@@ -88,6 +88,8 @@ Description:
speed of 10MBps of the named network device.
Setting this value also immediately changes the LED state.
+ Present only if the named network device supports 10Mbps link speed.
+
What: /sys/class/leds/<led>/link_100
Date: Jun 2023
KernelVersion: 6.5
@@ -101,6 +103,8 @@ Description:
speed of 100Mbps of the named network device.
Setting this value also immediately changes the LED state.
+ Present only if the named network device supports 100Mbps link speed.
+
What: /sys/class/leds/<led>/link_1000
Date: Jun 2023
KernelVersion: 6.5
@@ -114,6 +118,8 @@ Description:
speed of 1000Mbps of the named network device.
Setting this value also immediately changes the LED state.
+ Present only if the named network device supports 1000Mbps link speed.
+
What: /sys/class/leds/<led>/link_2500
Date: Nov 2023
KernelVersion: 6.8
@@ -127,6 +133,8 @@ Description:
speed of 2500Mbps of the named network device.
Setting this value also immediately changes the LED state.
+ Present only if the named network device supports 2500Mbps link speed.
+
What: /sys/class/leds/<led>/link_5000
Date: Nov 2023
KernelVersion: 6.8
@@ -140,6 +148,8 @@ Description:
speed of 5000Mbps of the named network device.
Setting this value also immediately changes the LED state.
+ Present only if the named network device supports 5000Mbps link speed.
+
What: /sys/class/leds/<led>/link_10000
Date: Nov 2023
KernelVersion: 6.8
@@ -153,6 +163,8 @@ Description:
speed of 10000Mbps of the named network device.
Setting this value also immediately changes the LED state.
+ Present only if the named network device supports 10000Mbps link speed.
+
What: /sys/class/leds/<led>/half_duplex
Date: Jun 2023
KernelVersion: 6.5
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
index 8c57d2780554..22f28f2e9ac4 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
@@ -12,6 +12,16 @@ Description:
The exact format is described in:
Documentation/devicetree/bindings/leds/leds-trigger-pattern.txt
+What: /sys/class/leds/<led>/hr_pattern
+Date: April 2024
+Description:
+ Specify a software pattern for the LED, that supports altering
+ the brightness for the specified duration with one software
+ timer. It can do gradual dimming and step change of brightness.
+
+ Unlike the /sys/class/leds/<led>/pattern, this attribute runs
+ a pattern on high-resolution timer (hrtimer).
+
What: /sys/class/leds/<led>/hw_pattern
Date: September 2018
KernelVersion: 4.20
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-tty b/Documentation/ABI/testing/sysfs-class-led-trigger-tty
index 30cef9ac0f49..308fbc3627cd 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-tty
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-tty
@@ -1,11 +1,11 @@
-What: /sys/class/leds/<led>/ttyname
+What: /sys/class/leds/<tty_led>/ttyname
Date: Dec 2020
KernelVersion: 5.10
Contact: linux-leds@vger.kernel.org
Description:
Specifies the tty device name of the triggering tty
-What: /sys/class/leds/<led>/rx
+What: /sys/class/leds/<tty_led>/rx
Date: February 2024
KernelVersion: 6.8
Description:
@@ -13,7 +13,7 @@ Description:
If set to 0, the LED will not blink on reception.
If set to 1 (default), the LED will blink on reception.
-What: /sys/class/leds/<led>/tx
+What: /sys/class/leds/<tty_led>/tx
Date: February 2024
KernelVersion: 6.8
Description:
@@ -21,7 +21,7 @@ Description:
If set to 0, the LED will not blink on transmission.
If set to 1 (default), the LED will blink on transmission.
-What: /sys/class/leds/<led>/cts
+What: /sys/class/leds/<tty_led>/cts
Date: February 2024
KernelVersion: 6.8
Description:
@@ -31,7 +31,7 @@ Description:
If set to 0 (default), the LED will not evaluate CTS.
If set to 1, the LED will evaluate CTS.
-What: /sys/class/leds/<led>/dsr
+What: /sys/class/leds/<tty_led>/dsr
Date: February 2024
KernelVersion: 6.8
Description:
@@ -41,7 +41,7 @@ Description:
If set to 0 (default), the LED will not evaluate DSR.
If set to 1, the LED will evaluate DSR.
-What: /sys/class/leds/<led>/dcd
+What: /sys/class/leds/<tty_led>/dcd
Date: February 2024
KernelVersion: 6.8
Description:
@@ -51,7 +51,7 @@ Description:
If set to 0 (default), the LED will not evaluate CAR (DCD).
If set to 1, the LED will evaluate CAR (DCD).
-What: /sys/class/leds/<led>/rng
+What: /sys/class/leds/<tty_led>/rng
Date: February 2024
KernelVersion: 6.8
Description:
diff --git a/Documentation/ABI/testing/sysfs-class-net-phydev b/Documentation/ABI/testing/sysfs-class-net-phydev
index ac722dd5e694..31615c59bff9 100644
--- a/Documentation/ABI/testing/sysfs-class-net-phydev
+++ b/Documentation/ABI/testing/sysfs-class-net-phydev
@@ -26,6 +26,16 @@ Description:
This ID is used to match the device with the appropriate
driver.
+What: /sys/class/mdio_bus/<bus>/<device>/c45_phy_ids/mmd<n>_device_id
+Date: June 2025
+KernelVersion: 6.17
+Contact: netdev@vger.kernel.org
+Description:
+ This attribute contains the 32-bit PHY Identifier as reported
+ by the device during bus enumeration, encoded in hexadecimal.
+ These C45 IDs are used to match the device with the appropriate
+ driver. These files are invisible to the C22 device.
+
What: /sys/class/mdio_bus/<bus>/<device>/phy_interface
Date: February 2014
KernelVersion: 3.15
diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues
index 906ff3ca928a..84aa25e0d14d 100644
--- a/Documentation/ABI/testing/sysfs-class-net-queues
+++ b/Documentation/ABI/testing/sysfs-class-net-queues
@@ -1,4 +1,4 @@
-What: /sys/class/<iface>/queues/rx-<queue>/rps_cpus
+What: /sys/class/net/<iface>/queues/rx-<queue>/rps_cpus
Date: March 2010
KernelVersion: 2.6.35
Contact: netdev@vger.kernel.org
@@ -8,7 +8,7 @@ Description:
network device queue. Possible values depend on the number
of available CPU(s) in the system.
-What: /sys/class/<iface>/queues/rx-<queue>/rps_flow_cnt
+What: /sys/class/net/<iface>/queues/rx-<queue>/rps_flow_cnt
Date: April 2010
KernelVersion: 2.6.35
Contact: netdev@vger.kernel.org
@@ -16,7 +16,7 @@ Description:
Number of Receive Packet Steering flows being currently
processed by this particular network device receive queue.
-What: /sys/class/<iface>/queues/tx-<queue>/tx_timeout
+What: /sys/class/net/<iface>/queues/tx-<queue>/tx_timeout
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -24,7 +24,7 @@ Description:
Indicates the number of transmit timeout events seen by this
network interface transmit queue.
-What: /sys/class/<iface>/queues/tx-<queue>/tx_maxrate
+What: /sys/class/net/<iface>/queues/tx-<queue>/tx_maxrate
Date: March 2015
KernelVersion: 4.1
Contact: netdev@vger.kernel.org
@@ -32,7 +32,7 @@ Description:
A Mbps max-rate set for the queue, a value of zero means disabled,
default is disabled.
-What: /sys/class/<iface>/queues/tx-<queue>/xps_cpus
+What: /sys/class/net/<iface>/queues/tx-<queue>/xps_cpus
Date: November 2010
KernelVersion: 2.6.38
Contact: netdev@vger.kernel.org
@@ -42,7 +42,7 @@ Description:
network device transmit queue. Possible values depend on the
number of available CPU(s) in the system.
-What: /sys/class/<iface>/queues/tx-<queue>/xps_rxqs
+What: /sys/class/net/<iface>/queues/tx-<queue>/xps_rxqs
Date: June 2018
KernelVersion: 4.18.0
Contact: netdev@vger.kernel.org
@@ -53,7 +53,7 @@ Description:
number of available receive queue(s) in the network device.
Default is disabled.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -62,7 +62,7 @@ Description:
of this particular network device transmit queue.
Default value is 1000.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -70,7 +70,7 @@ Description:
Indicates the number of bytes (objects) in flight on this
network device transmit queue.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -79,7 +79,7 @@ Description:
on this network device transmit queue. This value is clamped
to be within the bounds defined by limit_max and limit_min.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -88,7 +88,7 @@ Description:
queued on this network device transmit queue. See
include/linux/dynamic_queue_limits.h for the default value.
-What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
Date: November 2011
KernelVersion: 3.3
Contact: netdev@vger.kernel.org
@@ -96,3 +96,26 @@ Description:
Indicates the absolute minimum limit of bytes allowed to be
queued on this network device transmit queue. Default value is
0.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_thrs
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Tx completion stall detection threshold in ms. Kernel will
+ guarantee to detect all stalls longer than this threshold but
+ may also detect stalls longer than half of the threshold.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_cnt
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Number of detected Tx completion stalls.
+
+What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/stall_max
+Date: Jan 2024
+KernelVersion: 6.9
+Contact: netdev@vger.kernel.org
+Description:
+ Longest detected Tx completion stall. Write 0 to clear.
diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics
index 55db27815361..53e508c6936a 100644
--- a/Documentation/ABI/testing/sysfs-class-net-statistics
+++ b/Documentation/ABI/testing/sysfs-class-net-statistics
@@ -1,4 +1,4 @@
-What: /sys/class/<iface>/statistics/collisions
+What: /sys/class/net/<iface>/statistics/collisions
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -6,7 +6,7 @@ Description:
Indicates the number of collisions seen by this network device.
This value might not be relevant with all MAC layers.
-What: /sys/class/<iface>/statistics/multicast
+What: /sys/class/net/<iface>/statistics/multicast
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -14,7 +14,7 @@ Description:
Indicates the number of multicast packets received by this
network device.
-What: /sys/class/<iface>/statistics/rx_bytes
+What: /sys/class/net/<iface>/statistics/rx_bytes
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -23,7 +23,7 @@ Description:
See the network driver for the exact meaning of when this
value is incremented.
-What: /sys/class/<iface>/statistics/rx_compressed
+What: /sys/class/net/<iface>/statistics/rx_compressed
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -32,7 +32,7 @@ Description:
network device. This value might only be relevant for interfaces
that support packet compression (e.g: PPP).
-What: /sys/class/<iface>/statistics/rx_crc_errors
+What: /sys/class/net/<iface>/statistics/rx_crc_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -41,7 +41,7 @@ Description:
by this network device. Note that the specific meaning might
depend on the MAC layer used by the interface.
-What: /sys/class/<iface>/statistics/rx_dropped
+What: /sys/class/net/<iface>/statistics/rx_dropped
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -51,7 +51,7 @@ Description:
packet processing. See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_errors
+What: /sys/class/net/<iface>/statistics/rx_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -59,7 +59,7 @@ Description:
Indicates the number of receive errors on this network device.
See the network driver for the exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_fifo_errors
+What: /sys/class/net/<iface>/statistics/rx_fifo_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -68,7 +68,7 @@ Description:
network device. See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_frame_errors
+What: /sys/class/net/<iface>/statistics/rx_frame_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -78,7 +78,7 @@ Description:
on the MAC layer protocol used. See the network driver for
the exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_length_errors
+What: /sys/class/net/<iface>/statistics/rx_length_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -87,7 +87,7 @@ Description:
error, oversized or undersized. See the network driver for the
exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_missed_errors
+What: /sys/class/net/<iface>/statistics/rx_missed_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -96,7 +96,7 @@ Description:
due to lack of capacity in the receive side. See the network
driver for the exact meaning of this value.
-What: /sys/class/<iface>/statistics/rx_nohandler
+What: /sys/class/net/<iface>/statistics/rx_nohandler
Date: February 2016
KernelVersion: 4.6
Contact: netdev@vger.kernel.org
@@ -104,7 +104,7 @@ Description:
Indicates the number of received packets that were dropped on
an inactive device by the network core.
-What: /sys/class/<iface>/statistics/rx_over_errors
+What: /sys/class/net/<iface>/statistics/rx_over_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -114,7 +114,7 @@ Description:
(e.g: larger than MTU). See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/rx_packets
+What: /sys/class/net/<iface>/statistics/rx_packets
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -122,7 +122,7 @@ Description:
Indicates the total number of good packets received by this
network device.
-What: /sys/class/<iface>/statistics/tx_aborted_errors
+What: /sys/class/net/<iface>/statistics/tx_aborted_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -132,7 +132,7 @@ Description:
a medium collision). See the network driver for the exact
meaning of this value.
-What: /sys/class/<iface>/statistics/tx_bytes
+What: /sys/class/net/<iface>/statistics/tx_bytes
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -143,7 +143,7 @@ Description:
transmitted packets or all packets that have been queued for
transmission.
-What: /sys/class/<iface>/statistics/tx_carrier_errors
+What: /sys/class/net/<iface>/statistics/tx_carrier_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -152,7 +152,7 @@ Description:
because of carrier errors (e.g: physical link down). See the
network driver for the exact meaning of this value.
-What: /sys/class/<iface>/statistics/tx_compressed
+What: /sys/class/net/<iface>/statistics/tx_compressed
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -161,7 +161,7 @@ Description:
this might only be relevant for devices that support
compression (e.g: PPP).
-What: /sys/class/<iface>/statistics/tx_dropped
+What: /sys/class/net/<iface>/statistics/tx_dropped
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -170,7 +170,7 @@ Description:
See the driver for the exact reasons as to why the packets were
dropped.
-What: /sys/class/<iface>/statistics/tx_errors
+What: /sys/class/net/<iface>/statistics/tx_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -179,7 +179,7 @@ Description:
a network device. See the driver for the exact reasons as to
why the packets were dropped.
-What: /sys/class/<iface>/statistics/tx_fifo_errors
+What: /sys/class/net/<iface>/statistics/tx_fifo_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -188,7 +188,7 @@ Description:
FIFO error. See the driver for the exact reasons as to why the
packets were dropped.
-What: /sys/class/<iface>/statistics/tx_heartbeat_errors
+What: /sys/class/net/<iface>/statistics/tx_heartbeat_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -197,7 +197,7 @@ Description:
reported as heartbeat errors. See the driver for the exact
reasons as to why the packets were dropped.
-What: /sys/class/<iface>/statistics/tx_packets
+What: /sys/class/net/<iface>/statistics/tx_packets
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
@@ -206,7 +206,7 @@ Description:
device. See the driver for whether this reports the number of all
attempted or successful transmissions.
-What: /sys/class/<iface>/statistics/tx_window_errors
+What: /sys/class/net/<iface>/statistics/tx_window_errors
Date: April 2005
KernelVersion: 2.6.12
Contact: netdev@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd
deleted file mode 100644
index ba1ce626591d..000000000000
--- a/Documentation/ABI/testing/sysfs-class-pktcdvd
+++ /dev/null
@@ -1,97 +0,0 @@
-sysfs interface
----------------
-The pktcdvd module (packet writing driver) creates the following files in the
-sysfs: (<devid> is in the format major:minor)
-
-What: /sys/class/pktcdvd/add
-What: /sys/class/pktcdvd/remove
-What: /sys/class/pktcdvd/device_map
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
-
- ========== ==============================================
- add (WO) Write a block device id (major:minor) to
- create a new pktcdvd device and map it to the
- block device.
-
- remove (WO) Write the pktcdvd device id (major:minor)
- to remove the pktcdvd device.
-
- device_map (RO) Shows the device mapping in format:
- pktcdvd[0-7] <pktdevid> <blkdevid>
- ========== ==============================================
-
-
-What: /sys/class/pktcdvd/pktcdvd[0-7]/dev
-What: /sys/class/pktcdvd/pktcdvd[0-7]/uevent
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
- dev: (RO) Device id
-
- uevent: (WO) To send a uevent
-
-
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather
-What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/reset
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
- packets_started: (RO) Number of started packets.
-
- packets_finished: (RO) Number of finished packets.
-
- kb_written: (RO) kBytes written.
-
- kb_read: (RO) kBytes read.
-
- kb_read_gather: (RO) kBytes read to fill write packets.
-
- reset: (WO) Write any value to it to reset
- pktcdvd device statistic values, like
- bytes read/written.
-
-
-What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size
-What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off
-What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on
-Date: Oct. 2006
-KernelVersion: 2.6.20
-Contact: Thomas Maier <balagi@justmail.de>
-Description:
- ============== ================================================
- size (RO) Contains the size of the bio write queue.
-
- congestion_off (RW) If bio write queue size is below this mark,
- accept new bio requests from the block layer.
-
- congestion_on (RW) If bio write queue size is higher as this
- mark, do no longer accept bio write requests
- from the block layer and wait till the pktcdvd
- device has processed enough bio's so that bio
- write queue size is below congestion off mark.
- A value of <= 0 disables congestion control.
- ============== ================================================
-
-
-Example:
---------
-To use the pktcdvd sysfs interface directly, you can do::
-
- # create a new pktcdvd device mapped to /dev/hdc
- echo "22:0" >/sys/class/pktcdvd/add
- cat /sys/class/pktcdvd/device_map
- # assuming device pktcdvd0 was created, look at stat's
- cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
- # print the device id of the mapped block device
- fgrep pktcdvd0 /sys/class/pktcdvd/device_map
- # remove device, using pktcdvd0 device id 253:0
- echo "253:0" >/sys/class/pktcdvd/remove
diff --git a/Documentation/ABI/testing/sysfs-class-platform-profile b/Documentation/ABI/testing/sysfs-class-platform-profile
new file mode 100644
index 000000000000..fcab26894ec3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-platform-profile
@@ -0,0 +1,50 @@
+What: /sys/class/platform-profile/platform-profile-X/name
+Date: March 2025
+KernelVersion: 6.14
+Description: Name of the class device given by the driver.
+
+ RO
+
+What: /sys/class/platform-profile/platform-profile-X/choices
+Date: March 2025
+KernelVersion: 6.14
+Description: This file contains a space-separated list of profiles supported
+ for this device.
+
+ Drivers must use the following standard profile-names:
+
+ ==================== ========================================
+ low-power Low power consumption
+ cool Cooler operation
+ quiet Quieter operation
+ balanced Balance between low power consumption
+ and performance
+ balanced-performance Balance between performance and low
+ power consumption with a slight bias
+ towards performance
+ performance High performance operation
+ max-power Higher performance operation that may exceed
+ internal battery draw limits when on AC power
+ custom Driver defined custom profile
+ ==================== ========================================
+
+ RO
+
+What: /sys/class/platform-profile/platform-profile-X/profile
+Date: March 2025
+KernelVersion: 6.14
+Description: Reading this file gives the current selected profile for this
+ device. Writing this file with one of the strings from
+ platform_profile_choices changes the profile to the new value.
+
+ This file can be monitored for changes by polling for POLLPRI,
+ POLLPRI will be signaled on any changes, independent of those
+ changes coming from a userspace write; or coming from another
+ source such as e.g. a hotkey triggered profile change handled
+ either directly by the embedded-controller or fully handled
+ inside the kernel.
+
+ This file may also emit the string 'custom' to indicate
+ that the driver is using a driver defined custom profile.
+
+ RW
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index 7c81f0a25a48..4b21d5d23251 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -377,24 +377,60 @@ What: /sys/class/power_supply/<supply_name>/charge_type
Date: July 2009
Contact: linux-pm@vger.kernel.org
Description:
- Represents the type of charging currently being applied to the
- battery. "Trickle", "Fast", and "Standard" all mean different
- charging speeds. "Adaptive" means that the charger uses some
- algorithm to adjust the charge rate dynamically, without
- any user configuration required. "Custom" means that the charger
- uses the charge_control_* properties as configuration for some
- different algorithm. "Long Life" means the charger reduces its
- charging rate in order to prolong the battery health. "Bypass"
- means the charger bypasses the charging path around the
- integrated converter allowing for a "smart" wall adaptor to
- perform the power conversion externally.
+ Select the charging algorithm to use for a battery.
+
+ Standard:
+ Fully charge the battery at a moderate rate.
+ Fast:
+ Quickly charge the battery using fast-charge
+ technology. This is typically harder on the battery
+ than standard charging and may lower its lifespan.
+ Trickle:
+ Users who primarily operate the system while
+ plugged into an external power source can extend
+ battery life with this mode. Vendor tooling may
+ call this "Primarily AC Use".
+ Adaptive:
+ Automatically optimize battery charge rate based
+ on typical usage pattern.
+ Custom:
+ Use the charge_control_* properties to determine
+ when to start and stop charging. Advanced users
+ can use this to drastically extend battery life.
+ Long Life:
+ The charger reduces its charging rate in order to
+ prolong the battery health.
+ Bypass:
+ The charger bypasses the charging path around the
+ integrated converter allowing for a "smart" wall
+ adaptor to perform the power conversion externally.
Access: Read, Write
+ Reading this returns the current active value, e.g. 'Standard'.
+ Check charge_types to get the values supported by the battery.
+
Valid values:
"Unknown", "N/A", "Trickle", "Fast", "Standard",
"Adaptive", "Custom", "Long Life", "Bypass"
+What: /sys/class/power_supply/<supply_name>/charge_types
+Date: December 2024
+Contact: linux-pm@vger.kernel.org
+Description:
+ Identical to charge_type but reading returns a list of supported
+ charge-types with the currently active type surrounded by square
+ brackets, e.g.: "Fast [Standard] Long_Life".
+
+ power_supply class devices may support both charge_type and
+ charge_types for backward compatibility. In this case both will
+ always have the same active value and the active value can be
+ changed by writing either property.
+
+ Note charge-types which contain a space such as "Long Life" will
+ have the space replaced by a '_' resulting in e.g. "Long_Life".
+ When writing charge-types both variants are accepted.
+
What: /sys/class/power_supply/<supply_name>/charge_term_current
Date: July 2014
Contact: linux-pm@vger.kernel.org
@@ -417,10 +453,10 @@ Description:
Valid values:
"Unknown", "Good", "Overheat", "Dead",
- "Over voltage", "Unspecified failure", "Cold",
+ "Over voltage", "Under voltage", "Unspecified failure", "Cold",
"Watchdog timer expire", "Safety timer expire",
"Over current", "Calibration required", "Warm",
- "Cool", "Hot", "No battery"
+ "Cool", "Hot", "No battery", "Blown fuse", "Cell imbalance"
What: /sys/class/power_supply/<supply_name>/precharge_current
Date: June 2017
@@ -472,11 +508,12 @@ Description:
Access: Read, Write
Valid values:
- ================ ====================================
- auto: Charge normally, respect thresholds
- inhibit-charge: Do not charge while AC is attached
- force-discharge: Force discharge while AC is attached
- ================ ====================================
+ ===================== ========================================
+ auto: Charge normally, respect thresholds
+ inhibit-charge: Do not charge while AC is attached
+ inhibit-charge-awake: inhibit-charge only when device is awake
+ force-discharge: Force discharge while AC is attached
+ ===================== ========================================
What: /sys/class/power_supply/<supply_name>/technology
Date: May 2007
@@ -516,6 +553,43 @@ Description:
Integer > 0: representing full cycles
Integer = 0: cycle_count info is not available
+What: /sys/class/power_supply/<supply_name>/internal_resistance
+Date: August 2025
+Contact: linux-arm-msm@vger.kernel.org
+Description:
+ Represent the battery's internal resistance, often referred
+ to as Equivalent Series Resistance (ESR). It is a dynamic
+ parameter that reflects the opposition to current flow within
+ the cell. It is not a fixed value but varies significantly
+ based on several operational conditions, including battery
+ state of charge (SoC), temperature, and whether the battery
+ is in a charging or discharging state.
+
+ Access: Read
+
+ Valid values: Represented in microohms
+
+What: /sys/class/power_supply/<supply_name>/state_of_health
+Date: August 2025
+Contact: linux-arm-msm@vger.kernel.org
+Description:
+ The state_of_health parameter quantifies the overall condition
+ of a battery as a percentage, reflecting its ability to deliver
+ rated performance relative to its original specifications. It is
+ dynamically computed using a combination of learned capacity
+ and impedance-based degradation indicators, both of which evolve
+ over the battery's lifecycle.
+ Note that the exact algorithms are kept secret by most battery
+ vendors and the value from different battery vendors cannot be
+ compared with each other as there is no vendor-agnostic definition
+ of "performance". Also this usually cannot be used for any
+ calculations (i.e. this is not the factor between charge_full and
+ charge_full_design).
+
+ Access: Read
+
+ Valid values: 0 - 100 (percent)
+
**USB Properties**
What: /sys/class/power_supply/<supply_name>/input_current_limit
@@ -592,7 +666,12 @@ Description:
the supply, for example it can show if USB-PD capable source
is attached.
- Access: Read-Only
+ Access: For power-supplies which consume USB power such
+ as battery charger chips, this indicates the type of
+ the connected USB power source and is Read-Only.
+
+ For power-supplies which act as a USB power-source such as
+ e.g. the UCS1002 USB Port Power Controller this is writable.
Valid values:
"Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",
@@ -772,3 +851,55 @@ Description:
Access: Read
Valid values: 1-31
+
+What: /sys/class/power_supply/<supply_name>/extensions/<extension_name>
+Date: March 2025
+Contact: linux-pm@vger.kernel.org
+Description:
+ Reports the extensions registered to the power supply.
+ Each entry is a link to the device which registered the extension.
+
+ Access: Read
+
+What: /sys/class/power_supply/max8971-charger/fast_charge_timer
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the maximum time the max8971
+ charger operates in fast-charge mode. When the timer expires
+ the device will terminate fast-charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+
+ - 4 - 10 (hours), step by 1
+ - 0: disabled.
+
+What: /sys/class/power_supply/max8971-charger/top_off_threshold_current
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the charging current threshold for
+ entering top-off charging mode. When charging current in fast
+ charge mode drops below this value, the charger will trigger
+ interrupt and start top-off charging mode.
+
+ Valid values:
+
+ - 50000 - 200000 (microamps), step by 50000 (rounded down)
+
+What: /sys/class/power_supply/max8971-charger/top_off_timer
+Date: May 2025
+KernelVersion: 6.15.0
+Contact: Svyatoslav Ryhel <clamor95@gmail.com>
+Description:
+ This entry shows and sets the maximum time the max8971
+ charger operates in top-off charge mode. When the timer expires
+ the device will terminate top-off charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+
+ - 0 - 70 (minutes), step by 10 (rounded down)
diff --git a/Documentation/ABI/testing/sysfs-class-power-gaokun b/Documentation/ABI/testing/sysfs-class-power-gaokun
new file mode 100644
index 000000000000..0633aed7b355
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-gaokun
@@ -0,0 +1,27 @@
+What: /sys/class/power_supply/gaokun-ec-battery/smart_charge_delay
+Date: March 2025
+KernelVersion: 6.15
+Contact: Pengyu Luo <mitltlatltl@gmail.com>
+Description:
+ This entry allows configuration of smart charging delay.
+
+ Smart charging behavior: when the power adapter is connected
+ for delay hours, battery charging will follow the rules of
+ charge_control_start_threshold and charge_control_end_threshold.
+ For more information about charge control, please refer to
+ sysfs-class-power.
+
+ Access: Read, Write
+
+ Valid values: In hours (non-negative)
+
+What: /sys/class/power_supply/gaokun-ec-battery/battery_adaptive_charge
+Date: March 2025
+KernelVersion: 6.15
+Contact: Pengyu Luo <mitltlatltl@gmail.com>
+Description:
+ This entry allows enabling battery adaptive charging.
+
+ Access: Read, Write
+
+ Valid values: 0 (disabled) or 1 (enabled)
diff --git a/Documentation/ABI/testing/sysfs-class-power-max1720x b/Documentation/ABI/testing/sysfs-class-power-max1720x
new file mode 100644
index 000000000000..7d895bfda9ce
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-max1720x
@@ -0,0 +1,32 @@
+What: /sys/class/power_supply/max1720x/temp_ain1
+Date: January 2025
+KernelVersion: 6.14
+Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+Description:
+ Reports the current temperature reading from AIN1 thermistor.
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/max1720x/temp_ain2
+Date: January 2025
+KernelVersion: 6.14
+Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+Description:
+ Reports the current temperature reading from AIN2 thermistor.
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
+
+What: /sys/class/power_supply/max1720x/temp_int
+Date: January 2025
+KernelVersion: 6.14
+Contact: Dimitri Fedrau <dimitri.fedrau@liebherr.com>
+Description:
+ Reports the current temperature reading from internal die.
+
+ Access: Read
+
+ Valid values: Represented in 1/10 Degrees Celsius
diff --git a/Documentation/ABI/testing/sysfs-class-power-rt9756 b/Documentation/ABI/testing/sysfs-class-power-rt9756
new file mode 100644
index 000000000000..c4d6c2b4715d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power-rt9756
@@ -0,0 +1,30 @@
+What: /sys/class/power_supply/rt9756-*/watchdog_timer
+Date: Dec 2025
+KernelVersion: 6.19
+Contact: ChiYuan Huang <cy_huang@richtek.com>
+Description:
+ This entry shows and sets the watchdog timer when rt9756 charger
+ operates in charging mode. When the timer expires, the device
+ will disable the charging. To prevent the timer expires, any
+ host communication can make the timer restarted.
+
+ Access: Read, Write
+
+ Valid values:
+ - 500, 1000, 5000, 30000, 40000, 80000, 128000 or 255000 (milliseconds),
+ - 0: disabled
+
+What: /sys/class/power_supply/rt9756-*/operation_mode
+Date: Dec 2025
+KernelVersion: 6.19
+Contact: ChiYuan Huang <cy_huang@richtek.com>
+Description:
+ This entry shows and set the operation mode when rt9756 charger
+ operates in charging phase. If 'bypass' mode is used, internal
+ path will connect vbus directly to vbat. Else, default 'div2'
+ mode for the switch-cap charging.
+
+ Access: Read, Write
+
+ Valid values:
+ - 'bypass' or 'div2'
diff --git a/Documentation/ABI/testing/sysfs-class-tee b/Documentation/ABI/testing/sysfs-class-tee
new file mode 100644
index 000000000000..c9144d16003e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-tee
@@ -0,0 +1,15 @@
+What: /sys/class/tee/tee{,priv}X/rpmb_routing_model
+Date: May 2024
+KernelVersion: 6.10
+Contact: op-tee@lists.trustedfirmware.org
+Description:
+ RPMB frames can be routed to the RPMB device via the
+ user-space daemon tee-supplicant or the RPMB subsystem
+ in the kernel. The value "user" means that the driver
+ will route the RPMB frames via user space. Conversely,
+ "kernel" means that the frames are routed via the RPMB
+ subsystem without assistance from tee-supplicant. It
+ should be assumed that RPMB frames are routed via user
+ space if the variable is absent. The primary purpose
+ of this variable is to let systemd know whether
+ tee-supplicant is needed in the early boot with initramfs.
diff --git a/Documentation/ABI/testing/sysfs-class-tsm b/Documentation/ABI/testing/sysfs-class-tsm
new file mode 100644
index 000000000000..6fc1a5ac6da1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-tsm
@@ -0,0 +1,19 @@
+What: /sys/class/tsm/tsmN
+Contact: linux-coco@lists.linux.dev
+Description:
+ "tsmN" is a device that represents the generic attributes of a
+ platform TEE Security Manager. It is typically a child of a
+ platform enumerated TSM device. /sys/class/tsm/tsmN/uevent
+ signals when the PCI layer is able to support establishment of
+ link encryption and other device-security features coordinated
+ through a platform tsm.
+
+What: /sys/class/tsm/tsmN/streamH.R.E
+Contact: linux-pci@vger.kernel.org
+Description:
+ (RO) When a host bridge has established a secure connection via
+ the platform TSM, symlink appears. The primary function of this
+ is have a system global review of TSM resource consumption
+ across host bridges. The link points to the endpoint PCI device
+ and matches the same link published by the host bridge. See
+ Documentation/ABI/testing/sysfs-devices-pci-host-bridge.
diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec
index 281b995beb05..38e101c17a00 100644
--- a/Documentation/ABI/testing/sysfs-class-typec
+++ b/Documentation/ABI/testing/sysfs-class-typec
@@ -149,6 +149,19 @@ Description:
advertise to the partner. The currently used capabilities are in
brackets. Selection happens by writing to the file.
+What: /sys/class/typec/<port>/usb_capability
+Date: November 2024
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description: Lists the supported USB Modes. The default USB mode that is used
+ next time with the Enter_USB Message is in brackets. The default
+ mode can be changed by writing to the file when supported by the
+ driver.
+
+ Valid values:
+ - usb2 (USB 2.0)
+ - usb3 (USB 3.2)
+ - usb4 (USB4)
+
USB Type-C partner devices (eg. /sys/class/typec/port0-partner/)
What: /sys/class/typec/<port>-partner/accessory_mode
@@ -220,6 +233,20 @@ Description:
directory exists, it will have an attribute file for every VDO
in Discover Identity command result.
+What: /sys/class/typec/<port>-partner/usb_mode
+Date: November 2024
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description: The USB Modes that the partner device supports. The active mode
+ is displayed in brackets. The active USB mode can be changed by
+ writing to this file when the port driver is able to send Data
+ Reset Message to the partner. That requires USB Power Delivery
+ contract between the partner and the port.
+
+ Valid values:
+ - usb2 (USB 2.0)
+ - usb3 (USB 3.2)
+ - usb4 (USB4)
+
USB Type-C cable devices (eg. /sys/class/typec/port0-cable/)
Note: Electronically Marked Cables will have a device also for one cable plug
diff --git a/Documentation/ABI/testing/sysfs-class-usb_power_delivery b/Documentation/ABI/testing/sysfs-class-usb_power_delivery
index 61d233c320ea..c754458a527e 100644
--- a/Documentation/ABI/testing/sysfs-class-usb_power_delivery
+++ b/Documentation/ABI/testing/sysfs-class-usb_power_delivery
@@ -254,3 +254,31 @@ Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Description:
The PPS Power Limited bit indicates whether or not the source
supply will exceed the rated output power if requested.
+
+Standard Power Range (SPR) Adjustable Voltage Supplies
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:spr_adjustable_voltage_supply
+Date: Oct 2025
+Contact: Badhri Jagan Sridharan <badhri@google.com>
+Description:
+ Adjustable Voltage Supply (AVS) Augmented PDO (APDO).
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:spr_adjustable_voltage_supply/maximum_current_9V_to_15V
+Date: Oct 2025
+Contact: Badhri Jagan Sridharan <badhri@google.com>
+Description:
+ Maximum Current for 9V to 15V range in milliamperes.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:spr_adjustable_voltage_supply/maximum_current_15V_to_20V
+Date: Oct 2025
+Contact: Badhri Jagan Sridharan <badhri@google.com>
+Description:
+ Maximum Current for greater than 15V till 20V range in
+ milliamperes.
+
+What: /sys/class/usb_power_delivery/.../<capability>/<position>:spr_adjustable_voltage_supply/peak_current
+Date: Oct 2025
+Contact: Badhri Jagan Sridharan <badhri@google.com>
+Description:
+ This file shows the value of the Adjustable Voltage Supply Peak Current
+ Capability field.
diff --git a/Documentation/ABI/testing/sysfs-class-usb_role b/Documentation/ABI/testing/sysfs-class-usb_role
index 3b810a425a52..9fab3f06679e 100644
--- a/Documentation/ABI/testing/sysfs-class-usb_role
+++ b/Documentation/ABI/testing/sysfs-class-usb_role
@@ -19,3 +19,9 @@ Description:
- none
- host
- device
+
+What: /sys/class/usb_role/<switch>/connector
+Date: Feb 2024
+Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+ Optional symlink to the USB Type-C connector.
diff --git a/Documentation/ABI/testing/sysfs-class-watchdog b/Documentation/ABI/testing/sysfs-class-watchdog
index 94fb74615951..70eabccf0557 100644
--- a/Documentation/ABI/testing/sysfs-class-watchdog
+++ b/Documentation/ABI/testing/sysfs-class-watchdog
@@ -76,7 +76,7 @@ Description:
timeout when the pretimeout interrupt is delivered. Pretimeout
is an optional feature.
-What: /sys/class/watchdog/watchdogn/pretimeout_avaialable_governors
+What: /sys/class/watchdog/watchdogn/pretimeout_available_governors
Date: February 2017
Contact: Wim Van Sebroeck <wim@iguana.be>
Description:
diff --git a/Documentation/ABI/testing/sysfs-devices-hisi_ptt b/Documentation/ABI/testing/sysfs-devices-hisi_ptt
deleted file mode 100644
index d7e206b4901c..000000000000
--- a/Documentation/ABI/testing/sysfs-devices-hisi_ptt
+++ /dev/null
@@ -1,113 +0,0 @@
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune
-Date: October 2022
-KernelVersion: 6.1
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: This directory contains files for tuning the PCIe link
- parameters(events). Each file is named after the event
- of the PCIe link.
-
- See Documentation/trace/hisi-ptt.rst for more information.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_cpl
-Date: October 2022
-KernelVersion: 6.1
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (RW) Controls the weight of Tx completion TLPs, which influence
- the proportion of outbound completion TLPs on the PCIe link.
- The available tune data is [0, 1, 2]. Writing a negative value
- will return an error, and out of range values will be converted
- to 2. The value indicates a probable level of the event.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_np
-Date: October 2022
-KernelVersion: 6.1
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (RW) Controls the weight of Tx non-posted TLPs, which influence
- the proportion of outbound non-posted TLPs on the PCIe link.
- The available tune data is [0, 1, 2]. Writing a negative value
- will return an error, and out of range values will be converted
- to 2. The value indicates a probable level of the event.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/qos_tx_p
-Date: October 2022
-KernelVersion: 6.1
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (RW) Controls the weight of Tx posted TLPs, which influence the
- proportion of outbound posted TLPs on the PCIe link.
- The available tune data is [0, 1, 2]. Writing a negative value
- will return an error, and out of range values will be converted
- to 2. The value indicates a probable level of the event.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/rx_alloc_buf_level
-Date: October 2022
-KernelVersion: 6.1
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (RW) Control the allocated buffer watermark for inbound packets.
- The packets will be stored in the buffer first and then transmitted
- either when the watermark reached or when timed out.
- The available tune data is [0, 1, 2]. Writing a negative value
- will return an error, and out of range values will be converted
- to 2. The value indicates a probable level of the event.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/tune/tx_alloc_buf_level
-Date: October 2022
-KernelVersion: 6.1
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (RW) Control the allocated buffer watermark of outbound packets.
- The packets will be stored in the buffer first and then transmitted
- either when the watermark reached or when timed out.
- The available tune data is [0, 1, 2]. Writing a negative value
- will return an error, and out of range values will be converted
- to 2. The value indicates a probable level of the event.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters
-Date: May 2023
-KernelVersion: 6.5
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: This directory contains the files providing the PCIe Root Port filters
- information used for PTT trace. Each file is named after the supported
- Root Port device name <domain>:<bus>:<device>.<function>.
-
- See the description of the "filter" in Documentation/trace/hisi-ptt.rst
- for more information.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters/multiselect
-Date: May 2023
-KernelVersion: 6.5
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (Read) Indicates if this kind of filter can be selected at the same
- time as others filters, or must be used on it's own. 1 indicates
- the former case and 0 indicates the latter.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/root_port_filters/<bdf>
-Date: May 2023
-KernelVersion: 6.5
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (Read) Indicates the filter value of this Root Port filter, which
- can be used to control the TLP headers to trace by the PTT trace.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters
-Date: May 2023
-KernelVersion: 6.5
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: This directory contains the files providing the PCIe Requester filters
- information used for PTT trace. Each file is named after the supported
- Endpoint device name <domain>:<bus>:<device>.<function>.
-
- See the description of the "filter" in Documentation/trace/hisi-ptt.rst
- for more information.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters/multiselect
-Date: May 2023
-KernelVersion: 6.5
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (Read) Indicates if this kind of filter can be selected at the same
- time as others filters, or must be used on it's own. 1 indicates
- the former case and 0 indicates the latter.
-
-What: /sys/devices/hisi_ptt<sicl_id>_<core_id>/requester_filters/<bdf>
-Date: May 2023
-KernelVersion: 6.5
-Contact: Yicong Yang <yangyicong@hisilicon.com>
-Description: (Read) Indicates the filter value of this Requester filter, which
- can be used to control the TLP headers to trace by the PTT trace.
diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index a95e0f17c35a..cec65827e602 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -115,6 +115,6 @@ What: /sys/devices/system/memory/crash_hotplug
Date: Aug 2023
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description:
- (RO) indicates whether or not the kernel directly supports
- modifying the crash elfcorehdr for memory hot un/plug and/or
- on/offline changes.
+ (RO) indicates whether or not the kernel updates relevant kexec
+ segments on memory hot un/plug and/or on/offline events, avoiding the
+ need to reload kdump kernel.
diff --git a/Documentation/ABI/testing/sysfs-devices-pci-host-bridge b/Documentation/ABI/testing/sysfs-devices-pci-host-bridge
new file mode 100644
index 000000000000..b91ec3450811
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-pci-host-bridge
@@ -0,0 +1,45 @@
+What: /sys/devices/pciDDDD:BB
+ /sys/devices/.../pciDDDD:BB
+Contact: linux-pci@vger.kernel.org
+Description:
+ A PCI host bridge device parents a PCI bus device topology. PCI
+ controllers may also parent host bridges. The DDDD:BB format
+ conveys the PCI domain (ACPI segment) number and root bus number
+ (in hexadecimal) of the host bridge. Note that the domain number
+ may be larger than the 16-bits that the "DDDD" format implies
+ for emulated host-bridges.
+
+What: pciDDDD:BB/firmware_node
+Contact: linux-pci@vger.kernel.org
+Description:
+ (RO) Symlink to the platform firmware device object "companion"
+ of the host bridge. For example, an ACPI device with an _HID of
+ PNP0A08 (/sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00). See
+ /sys/devices/pciDDDD:BB entry for details about the DDDD:BB
+ format.
+
+What: pciDDDD:BB/streamH.R.E
+Contact: linux-pci@vger.kernel.org
+Description:
+ (RO) When a platform has established a secure connection, PCIe
+ IDE, between two Partner Ports, this symlink appears. A stream
+ consumes a Stream ID slot in each of the Host bridge (H), Root
+ Port (R) and Endpoint (E). The link points to the Endpoint PCI
+ device in the Selective IDE Stream pairing. Specifically, "R"
+ and "E" represent the assigned Selective IDE Stream Register
+ Block in the Root Port and Endpoint, and "H" represents a
+ platform specific pool of stream resources shared by the Root
+ Ports in a host bridge. See /sys/devices/pciDDDD:BB entry for
+ details about the DDDD:BB format.
+
+What: pciDDDD:BB/available_secure_streams
+Contact: linux-pci@vger.kernel.org
+Description:
+ (RO) When a host bridge has Root Ports that support PCIe IDE
+ (link encryption and integrity protection) there may be a
+ limited number of Selective IDE Streams that can be used for
+ establishing new end-to-end secure links. This attribute
+ decrements upon secure link setup, and increments upon secure
+ link teardown. The in-use stream count is determined by counting
+ stream symlinks. See /sys/devices/pciDDDD:BB entry for details
+ about the DDDD:BB format.
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs b/Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs
index 1666340820f7..d1b3a95a5518 100644
--- a/Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs
+++ b/Documentation/ABI/testing/sysfs-devices-platform-kunpeng_hccs
@@ -79,3 +79,48 @@ Description:
indicates a lane.
crc_err_cnt: (RO) CRC err count on this port.
============= ==== =============================================
+
+What: /sys/devices/platform/HISI04Bx:00/used_types
+Date: August 2024
+KernelVersion: 6.12
+Contact: Huisong Li <lihuisong@huawei.com>
+Description:
+ This interface is used to show all HCCS types used on the
+ platform, like, HCCS-v1, HCCS-v2 and so on.
+
+What: /sys/devices/platform/HISI04Bx:00/available_inc_dec_lane_types
+What: /sys/devices/platform/HISI04Bx:00/dec_lane_of_type
+What: /sys/devices/platform/HISI04Bx:00/inc_lane_of_type
+Date: August 2024
+KernelVersion: 6.12
+Contact: Huisong Li <lihuisong@huawei.com>
+Description:
+ These interfaces under /sys/devices/platform/HISI04Bx/ are
+ used to support the low power consumption feature of some
+ HCCS types by changing the number of lanes used. The interfaces
+ changing the number of lanes used are 'dec_lane_of_type' and
+ 'inc_lane_of_type' which require root privileges. These
+ interfaces aren't exposed if no HCCS type on platform support
+ this feature. Please note that decreasing lane number is only
+ allowed if all the specified HCCS ports are not busy.
+
+ The low power consumption interfaces are as follows:
+
+ ============================= ==== ================================
+ available_inc_dec_lane_types: (RO) available HCCS types (string) to
+ increase and decrease the number
+ of lane used, e.g. HCCS-v2.
+ dec_lane_of_type: (WO) input HCCS type supported
+ decreasing lane to decrease the
+ used lane number of all specified
+ HCCS type ports on platform to
+ the minimum.
+ You can query the 'cur_lane_num'
+ to get the minimum lane number
+ after executing successfully.
+ inc_lane_of_type: (WO) input HCCS type supported
+ increasing lane to increase the
+ used lane number of all specified
+ HCCS type ports on platform to
+ the full lane state.
+ ============================= ==== ================================
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 54195530e97a..9bf7c8a267c5 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -1,6 +1,6 @@
What: /sys/devices/.../power/
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power directory contains attributes
allowing the user space to check and modify some power
@@ -8,7 +8,7 @@ Description:
What: /sys/devices/.../power/wakeup
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/wakeup attribute allows the user
space to check if the device is enabled to wake up the system
@@ -34,7 +34,7 @@ Description:
What: /sys/devices/.../power/control
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/control attribute allows the user
space to control the run-time power management of the device.
@@ -53,10 +53,10 @@ Description:
What: /sys/devices/.../power/async
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../async attribute allows the user space to
- enable or diasble the device's suspend and resume callbacks to
+ enable or disable the device's suspend and resume callbacks to
be executed asynchronously (ie. in separate threads, in parallel
with the main suspend/resume thread) during system-wide power
transitions (eg. suspend to RAM, hibernation).
@@ -79,7 +79,7 @@ Description:
What: /sys/devices/.../power/wakeup_count
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_count attribute contains the number
of signaled wakeup events associated with the device. This
@@ -90,7 +90,7 @@ Description:
What: /sys/devices/.../power/wakeup_active_count
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_active_count attribute contains the
number of times the processing of wakeup events associated with
@@ -102,7 +102,7 @@ Description:
What: /sys/devices/.../power/wakeup_abort_count
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_abort_count attribute contains the
number of times the processing of a wakeup event associated with
@@ -114,7 +114,7 @@ Description:
What: /sys/devices/.../power/wakeup_expire_count
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_expire_count attribute contains the
number of times a wakeup event associated with the device has
@@ -126,7 +126,7 @@ Description:
What: /sys/devices/.../power/wakeup_active
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_active attribute contains either 1,
or 0, depending on whether or not a wakeup event associated with
@@ -138,7 +138,7 @@ Description:
What: /sys/devices/.../power/wakeup_total_time_ms
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_total_time_ms attribute contains
the total time of processing wakeup events associated with the
@@ -149,7 +149,7 @@ Description:
What: /sys/devices/.../power/wakeup_max_time_ms
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_max_time_ms attribute contains
the maximum time of processing a single wakeup event associated
@@ -161,7 +161,7 @@ Description:
What: /sys/devices/.../power/wakeup_last_time_ms
Date: September 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_last_time_ms attribute contains
the value of the monotonic clock corresponding to the time of
@@ -173,7 +173,7 @@ Description:
What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute
contains the total time the device has been preventing
@@ -203,7 +203,7 @@ Description:
What: /sys/devices/.../power/pm_qos_resume_latency_us
Date: March 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/pm_qos_resume_latency_us attribute
contains the PM QoS resume latency limit for the given device,
@@ -223,7 +223,7 @@ Description:
What: /sys/devices/.../power/pm_qos_latency_tolerance_us
Date: January 2014
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/pm_qos_latency_tolerance_us attribute
contains the PM QoS active state latency tolerance limit for the
@@ -248,7 +248,7 @@ Description:
What: /sys/devices/.../power/pm_qos_no_power_off
Date: September 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/pm_qos_no_power_off attribute
is used for manipulating the PM QoS "no power off" flag. If
@@ -263,7 +263,7 @@ Description:
What: /sys/devices/.../power/runtime_status
Date: April 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/devices/.../power/runtime_status attribute contains
the current runtime PM status of the device, which may be
@@ -274,15 +274,15 @@ What: /sys/devices/.../power/runtime_active_time
Date: Jul 2010
Contact: Arjan van de Ven <arjan@linux.intel.com>
Description:
- Reports the total time that the device has been active.
- Used for runtime PM statistics.
+ Reports the total time that the device has been active, in
+ milliseconds. Used for runtime PM statistics.
What: /sys/devices/.../power/runtime_suspended_time
Date: Jul 2010
Contact: Arjan van de Ven <arjan@linux.intel.com>
Description:
- Reports total time that the device has been suspended.
- Used for runtime PM statistics.
+ Reports total time that the device has been suspended, in
+ milliseconds. Used for runtime PM statistics.
What: /sys/devices/.../power/runtime_usage
Date: Apr 2010
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index a1db6db47505..3a05604c21bf 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -111,6 +111,7 @@ What: /sys/devices/system/cpu/cpuidle/available_governors
/sys/devices/system/cpu/cpuidle/current_driver
/sys/devices/system/cpu/cpuidle/current_governor
/sys/devices/system/cpu/cpuidle/current_governer_ro
+ /sys/devices/system/cpu/cpuidle/intel_c1_demotion
Date: September 2007
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Discover cpuidle policy and mechanism
@@ -132,7 +133,11 @@ Description: Discover cpuidle policy and mechanism
current_governor_ro: (RO) displays current idle policy.
- See Documentation/admin-guide/pm/cpuidle.rst and
+ intel_c1_demotion: (RW) enables/disables the C1 demotion
+ feature on Intel CPUs.
+
+ See Documentation/admin-guide/pm/cpuidle.rst,
+ Documentation/admin-guide/pm/intel_idle.rst, and
Documentation/driver-api/pm/cpuidle.rst for more information.
@@ -268,6 +273,60 @@ Description: Discover CPUs in the same CPU frequency coordination domain
This file is only present if the acpi-cpufreq or the cppc-cpufreq
drivers are in use.
+What: /sys/devices/system/cpu/cpuX/cpufreq/auto_select
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Autonomous selection enable
+
+ Read/write interface to control autonomous selection enable
+ Read returns autonomous selection status:
+ 0: autonomous selection is disabled
+ 1: autonomous selection is enabled
+
+ Write 'y' or '1' or 'on' to enable autonomous selection.
+ Write 'n' or '0' or 'off' to disable autonomous selection.
+
+ This file is only present if the cppc-cpufreq driver is in use.
+
+What: /sys/devices/system/cpu/cpuX/cpufreq/auto_act_window
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Autonomous activity window
+
+ This file indicates a moving utilization sensitivity window to
+ the platform's autonomous selection policy.
+
+ Read/write an integer represents autonomous activity window (in
+ microseconds) from/to this file. The max value to write is
+ 1270000000 but the max significand is 127. This means that if 128
+ is written to this file, 127 will be stored. If the value is
+ greater than 130, only the first two digits will be saved as
+ significand.
+
+ Writing a zero value to this file enable the platform to
+ determine an appropriate Activity Window depending on the workload.
+
+ Writing to this file only has meaning when Autonomous Selection is
+ enabled.
+
+ This file is only present if the cppc-cpufreq driver is in use.
+
+What: /sys/devices/system/cpu/cpuX/cpufreq/energy_performance_preference_val
+Date: May 2025
+Contact: linux-pm@vger.kernel.org
+Description: Energy performance preference
+
+ Read/write an 8-bit integer from/to this file. This file
+ represents a range of values from 0 (performance preference) to
+ 0xFF (energy efficiency preference) that influences the rate of
+ performance increase/decrease and the result of the hardware's
+ energy efficiency and performance optimization policies.
+
+ Writing to this file only has meaning when Autonomous Selection is
+ enabled.
+
+ This file is only present if the cppc-cpufreq driver is in use.
+
What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
Date: August 2008
@@ -423,7 +482,7 @@ What: /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset
Date: March 2016
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
- Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+ Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: POWERNV CPUFreq driver's frequency throttle stats directory and
attributes
@@ -473,7 +532,7 @@ What: /sys/devices/system/cpu/cpufreq/policyX/throttle_stats
/sys/devices/system/cpu/cpufreq/policyX/throttle_stats/occ_reset
Date: March 2016
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
- Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+ Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: POWERNV CPUFreq driver's frequency throttle stats directory and
attributes
@@ -485,6 +544,7 @@ What: /sys/devices/system/cpu/cpuX/regs/
/sys/devices/system/cpu/cpuX/regs/identification/
/sys/devices/system/cpu/cpuX/regs/identification/midr_el1
/sys/devices/system/cpu/cpuX/regs/identification/revidr_el1
+ /sys/devices/system/cpu/cpuX/regs/identification/aidr_el1
/sys/devices/system/cpu/cpuX/regs/identification/smidr_el1
Date: June 2016
Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
@@ -511,17 +571,22 @@ Description: information about CPUs heterogeneity.
What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+ /sys/devices/system/cpu/vulnerabilities/indirect_target_selection
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
/sys/devices/system/cpu/vulnerabilities/l1tf
/sys/devices/system/cpu/vulnerabilities/mds
/sys/devices/system/cpu/vulnerabilities/meltdown
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+ /sys/devices/system/cpu/vulnerabilities/old_microcode
+ /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
/sys/devices/system/cpu/vulnerabilities/retbleed
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
/sys/devices/system/cpu/vulnerabilities/srbds
+ /sys/devices/system/cpu/vulnerabilities/tsa
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+ /sys/devices/system/cpu/vulnerabilities/vmscape
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
@@ -561,7 +626,8 @@ Description: Control Symmetric Multi Threading (SMT)
================ =========================================
If control status is "forceoff" or "notsupported" writes
- are rejected.
+ are rejected. Note that enabling SMT on PowerPC skips
+ offline cores.
What: /sys/devices/system/cpu/cpuX/power/energy_perf_bias
Date: March 2019
@@ -604,10 +670,22 @@ Description: Umwait control
Note that a value of zero means there is no limit.
Low order two bits must be zero.
+What: /sys/devices/system/cpu/sev
+ /sys/devices/system/cpu/sev/vmpl
+Date: May 2024
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Secure Encrypted Virtualization (SEV) information
+
+ This directory is only present when running as an SEV-SNP guest.
+
+ vmpl: Reports the Virtual Machine Privilege Level (VMPL) at which
+ the SEV-SNP guest is running.
+
+
What: /sys/devices/system/cpu/svm
Date: August 2019
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
- Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+ Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Secure Virtual Machine
If 1, it means the system is using the Protected Execution
@@ -616,7 +694,7 @@ Description: Secure Virtual Machine
What: /sys/devices/system/cpu/cpuX/purr
Date: Apr 2005
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: PURR ticks for this CPU since the system boot.
The Processor Utilization Resources Register (PURR) is
@@ -627,7 +705,7 @@ Description: PURR ticks for this CPU since the system boot.
What: /sys/devices/system/cpu/cpuX/spurr
Date: Dec 2006
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: SPURR ticks for this CPU since the system boot.
The Scaled Processor Utilization Resources Register
@@ -639,7 +717,7 @@ Description: SPURR ticks for this CPU since the system boot.
What: /sys/devices/system/cpu/cpuX/idle_purr
Date: Apr 2020
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: PURR ticks for cpuX when it was idle.
This sysfs interface exposes the number of PURR ticks
@@ -647,7 +725,7 @@ Description: PURR ticks for cpuX when it was idle.
What: /sys/devices/system/cpu/cpuX/idle_spurr
Date: Apr 2020
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: SPURR ticks for cpuX when it was idle.
This sysfs interface exposes the number of SPURR ticks
@@ -686,10 +764,27 @@ Description:
participate in load balancing. These CPUs are set by
boot parameter "isolcpus=".
+What: /sys/devices/system/cpu/housekeeping
+Date: Oct 2025
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ (RO) the list of logical CPUs that are designated by the kernel as
+ "housekeeping". Each CPU are responsible for handling essential
+ system-wide background tasks, including RCU callbacks, delayed
+ timer callbacks, and unbound workqueues, minimizing scheduling
+ jitter on low-latency, isolated CPUs. These CPUs are set when boot
+ parameter "isolcpus=nohz" or "nohz_full=" is specified.
+
What: /sys/devices/system/cpu/crash_hotplug
Date: Aug 2023
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description:
- (RO) indicates whether or not the kernel directly supports
- modifying the crash elfcorehdr for CPU hot un/plug and/or
- on/offline changes.
+ (RO) indicates whether or not the kernel updates relevant kexec
+ segments on memory hot un/plug and/or on/offline events, avoiding the
+ need to reload kdump kernel.
+
+What: /sys/devices/system/cpu/enabled
+Date: Nov 2022
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ (RO) the list of CPUs that can be brought online.
diff --git a/Documentation/ABI/testing/sysfs-devices-virtual-misc-tdx_guest b/Documentation/ABI/testing/sysfs-devices-virtual-misc-tdx_guest
new file mode 100644
index 000000000000..8fca56c8c9df
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-virtual-misc-tdx_guest
@@ -0,0 +1,63 @@
+What: /sys/devices/virtual/misc/tdx_guest/measurements/MRNAME[:HASH]
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ Value of a TDX measurement register (MR). MRNAME and HASH above
+ are placeholders. The optional suffix :HASH is used for MRs
+ that have associated hash algorithms. See below for a complete
+ list of TDX MRs exposed via sysfs. Refer to Intel TDX Module
+ ABI Specification for the definition of TDREPORT and the full
+ list of TDX measurements.
+
+ Intel TDX Module ABI Specification can be found at:
+ https://www.intel.com/content/www/us/en/developer/tools/trust-domain-extensions/documentation.html#architecture
+
+ See also:
+ https://docs.kernel.org/driver-api/coco/measurement-registers.html
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/mrconfigid
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) MRCONFIGID - 48-byte immutable storage typically used for
+ software-defined ID for non-owner-defined configuration of the
+ guest TD – e.g., run-time or OS configuration.
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/mrowner
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) MROWNER - 48-byte immutable storage typically used for
+ software-defined ID for the guest TD’s owner.
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/mrownerconfig
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) MROWNERCONFIG - 48-byte immutable storage typically used
+ for software-defined ID for owner-defined configuration of the
+ guest TD – e.g., specific to the workload rather than the
+ run-time or OS.
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/mrtd:sha384
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RO) MRTD - Measurement of the initial contents of the TD.
+
+What: /sys/devices/virtual/misc/tdx_guest/measurements/rtmr[0123]:sha384
+Date: April, 2025
+KernelVersion: v6.16
+Contact: linux-coco@lists.linux.dev
+Description:
+ (RW) RTMR[0123] - 4 Run-Time extendable Measurement Registers.
+ Read from any of these returns the current value of the
+ corresponding RTMR. Write extends the written buffer to the
+ RTMR. All writes must start at offset 0 and be 48 bytes in
+ size. Partial writes will result in EINVAL returned by the
+ write() syscall.
diff --git a/Documentation/ABI/testing/sysfs-driver-amd-sfh b/Documentation/ABI/testing/sysfs-driver-amd-sfh
new file mode 100644
index 000000000000..c053126a83bb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-amd-sfh
@@ -0,0 +1,13 @@
+What: /sys/bus/pci/drivers/pcie_mp2_amd/*/hpd
+Date: April 2025
+Contact: mario.limonciello@amd.com
+Description:
+ Human presence detection (HPD) enable/disable.
+ When HPD is enabled, the device will be able to detect the
+ presence of a human and will send an interrupt that can be
+ used to wake the system from a low power state.
+ When HPD is disabled, the device will not be able to detect
+ the presence of a human.
+
+ Access: Read/Write
+ Valid values: enabled/disabled
diff --git a/Documentation/ABI/testing/sysfs-driver-framer-pef2256 b/Documentation/ABI/testing/sysfs-driver-framer-pef2256
new file mode 100644
index 000000000000..29f97783bf07
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-framer-pef2256
@@ -0,0 +1,8 @@
+What: /sys/bus/platform/devices/xxx/version
+Date: Sep 2025
+Contact: netdev@vger.kernel.org
+Description: Reports the version of the PEF2256 framer
+
+ Access: Read
+
+ Valid values: Represented as string
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
new file mode 100644
index 000000000000..8c9718d83e9d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
@@ -0,0 +1,13 @@
+What: /sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
+Date: March, 2025
+KernelVersion: 6.15
+Contact: linux-input@vger.kernel.org
+Description:
+ The set of keys displayed on the Touch Bar.
+ Valid values are:
+ == =================
+ 0 Escape key only
+ 1 Function keys
+ 2 Media/brightness keys
+ 3 None
+ == =================
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-corsair-void b/Documentation/ABI/testing/sysfs-driver-hid-corsair-void
new file mode 100644
index 000000000000..83fa625c0025
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-corsair-void
@@ -0,0 +1,38 @@
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/fw_version_headset
+Date: January 2024
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (R) The firmware version of the headset
+ * Returns -ENODATA if no version was reported
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/fw_version_receiver
+Date: January 2024
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (R) The firmware version of the receiver
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/microphone_up
+Date: July 2023
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (R) Get the physical position of the microphone
+ * 1 -> Microphone up
+ * 0 -> Microphone down
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/send_alert
+Date: July 2023
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (W) Play a built-in notification from the headset (0 / 1)
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/set_sidetone
+Date: December 2023
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (W) Set the sidetone volume (0 - sidetone_max)
+
+What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/sidetone_max
+Date: July 2024
+KernelVersion: 6.13
+Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description: (R) Report the maximum sidetone volume
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index 8d7d8f05f6cd..a885e5316d02 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -1,4 +1,4 @@
-What: /sys/devices/.../hwmon/hwmon<i>/in0_input
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/in0_input
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -6,7 +6,7 @@ Description: RO. Current Voltage in millivolt.
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_max
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_max
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -20,7 +20,7 @@ Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts.
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_rated_max
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -28,7 +28,7 @@ Description: RO. Card default power limit (default TDP setting).
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_max_interval
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -37,7 +37,7 @@ Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_crit
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/power1_crit
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -50,7 +50,7 @@ Description: RW. Card reactive critical (I1) power limit in microwatts.
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/curr1_crit
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/curr1_crit
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -63,7 +63,7 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes.
Only supported for particular Intel i915 graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/energy1_input
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/energy1_input
Date: February 2023
KernelVersion: 6.2
Contact: intel-gfx@lists.freedesktop.org
@@ -75,3 +75,19 @@ Description: RO. Energy input of device or gt in microjoules.
for the gt.
Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/fan1_input
+Date: November 2024
+KernelVersion: 6.12
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. Fan speed of device in RPM.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon<i>/temp1_input
+Date: November 2024
+KernelVersion: 6.12
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. GPU package temperature in millidegree Celsius.
+
+ Only supported for particular Intel i915 graphics platforms.
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc
index c12316dfd973..faeae8fedb14 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc
+++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc
@@ -17,7 +17,7 @@ Description: Read only. Returns the firmware version of Intel MAX10
What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_address
Date: January 2021
KernelVersion: 5.12
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns the first MAC address in a block
of sequential MAC addresses assigned to the board
that is managed by the Intel MAX10 BMC. It is stored in
@@ -28,7 +28,7 @@ Description: Read only. Returns the first MAC address in a block
What: /sys/bus/.../drivers/intel-m10-bmc/.../mac_count
Date: January 2021
KernelVersion: 5.12
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns the number of sequential MAC
addresses assigned to the board managed by the Intel
MAX10 BMC. This value is stored in FLASH and is mirrored
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update
index 9051695d2211..3a6ca780c75c 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update
+++ b/Documentation/ABI/testing/sysfs-driver-intel-m10-bmc-sec-update
@@ -1,7 +1,7 @@
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_root_entry_hash
Date: Sep 2022
KernelVersion: 5.20
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns the root entry hash for the static
region if one is programmed, else it returns the
string: "hash not programmed". This file is only
@@ -11,7 +11,7 @@ Description: Read only. Returns the root entry hash for the static
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_root_entry_hash
Date: Sep 2022
KernelVersion: 5.20
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns the root entry hash for the partial
reconfiguration region if one is programmed, else it
returns the string: "hash not programmed". This file
@@ -21,7 +21,7 @@ Description: Read only. Returns the root entry hash for the partial
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_root_entry_hash
Date: Sep 2022
KernelVersion: 5.20
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns the root entry hash for the BMC image
if one is programmed, else it returns the string:
"hash not programmed". This file is only visible if the
@@ -31,7 +31,7 @@ Description: Read only. Returns the root entry hash for the BMC image
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_canceled_csks
Date: Sep 2022
KernelVersion: 5.20
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns a list of indices for canceled code
signing keys for the static region. The standard bitmap
list format is used (e.g. "1,2-6,9").
@@ -39,7 +39,7 @@ Description: Read only. Returns a list of indices for canceled code
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_canceled_csks
Date: Sep 2022
KernelVersion: 5.20
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns a list of indices for canceled code
signing keys for the partial reconfiguration region. The
standard bitmap list format is used (e.g. "1,2-6,9").
@@ -47,7 +47,7 @@ Description: Read only. Returns a list of indices for canceled code
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_canceled_csks
Date: Sep 2022
KernelVersion: 5.20
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns a list of indices for canceled code
signing keys for the BMC. The standard bitmap list format
is used (e.g. "1,2-6,9").
@@ -55,7 +55,7 @@ Description: Read only. Returns a list of indices for canceled code
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/flash_count
Date: Sep 2022
KernelVersion: 5.20
-Contact: Peter Colberg <peter.colberg@intel.com>
+Contact: Matthew Gerlach <matthew.gerlach@altera.com>
Description: Read only. Returns number of times the secure update
staging area has been flashed.
Format: "%u".
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
index 8c321bc9dc04..d9e2b17c6872 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
@@ -1,4 +1,4 @@
-What: /sys/devices/.../hwmon/hwmon<i>/power1_max
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
@@ -10,20 +10,60 @@ Description: RW. Card reactive sustained (PL1) power limit in microwatts.
power limit is disabled, writing 0 disables the
limit. Writing values > 0 and <= TDP will enable the power limit.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_rated_max
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
Description: RO. Card default power limit (default TDP setting).
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_crit
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy1_input
Date: September 2023
KernelVersion: 6.5
Contact: intel-xe@lists.freedesktop.org
+Description: RO. Card energy input of device in microjoules.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_max_interval
+Date: October 2023
+KernelVersion: 6.6
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card sustained power limit interval (Tau in PL1/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_max
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Package reactive sustained (PL1) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically seconds)
+ exceeds this limit. A read value of 0 means that the PL1
+ power limit is disabled, writing 0 disables the
+ limit. Writing values > 0 and <= TDP will enable the power limit.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_rated_max
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Package default power limit (default TDP setting).
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_crit
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
Description: RW. Card reactive critical (I1) power limit in microwatts.
Card reactive critical (I1) power limit in microwatts is exposed
@@ -31,11 +71,11 @@ Description: RW. Card reactive critical (I1) power limit in microwatts.
operating frequency if the power averaged over a window exceeds
this limit.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/curr1_crit
-Date: September 2023
-KernelVersion: 6.5
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/curr1_crit
+Date: May 2025
+KernelVersion: 6.15
Contact: intel-xe@lists.freedesktop.org
Description: RW. Card reactive critical (I1) power limit in milliamperes.
@@ -44,27 +84,115 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes.
the operating frequency if the power averaged over a window
exceeds this limit.
-What: /sys/devices/.../hwmon/hwmon<i>/in0_input
-Date: September 2023
-KernelVersion: 6.5
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/energy2_input
+Date: February 2024
+KernelVersion: 6.8
Contact: intel-xe@lists.freedesktop.org
-Description: RO. Current Voltage in millivolt.
+Description: RO. Package energy input of device in microjoules.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/energy1_input
-Date: September 2023
-KernelVersion: 6.5
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_max_interval
+Date: February 2024
+KernelVersion: 6.8
Contact: intel-xe@lists.freedesktop.org
-Description: RO. Energy input of device in microjoules.
+Description: RW. Package sustained power limit interval (Tau in PL1/Tau) in
+ milliseconds over which sustained power is averaged.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
-What: /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
-Date: October 2023
-KernelVersion: 6.6
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/in1_input
+Date: February 2024
+KernelVersion: 6.8
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Package current voltage in millivolt.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_input
+Date: March 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Package temperature in millidegree Celsius.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_input
+Date: March 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. VRAM temperature in millidegree Celsius.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input
+Date: March 2025
+KernelVersion: 6.16
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 1 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan2_input
+Date: March 2025
+KernelVersion: 6.16
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 2 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input
+Date: March 2025
+KernelVersion: 6.16
+Contact: intel-xe@lists.freedesktop.org
+Description: RO. Fan 3 speed in RPM.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card burst (PL2) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically milli seconds)
+ exceeds this limit. A read value of 0 means that the PL2
+ power limit is disabled, writing 0 disables the limit.
+ PL2 is greater than PL1 and its time window is lesser
+ compared to PL1.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Package burst (PL2) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically milli seconds)
+ exceeds this limit. A read value of 0 means that the PL2
+ power limit is disabled, writing 0 disables the limit.
+ PL2 is greater than PL1 and its time window is lesser
+ compared to PL1.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power1_cap_interval
+Date: May 2025
+KernelVersion: 6.15
+Contact: intel-xe@lists.freedesktop.org
+Description: RW. Card burst power limit interval (Tau in PL2/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel Xe graphics platforms.
+
+What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/power2_cap_interval
+Date: May 2025
+KernelVersion: 6.15
Contact: intel-xe@lists.freedesktop.org
-Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
+Description: RW. Package burst power limit interval (Tau in PL2/Tau) in
milliseconds over which sustained power is averaged.
- Only supported for particular Intel xe graphics platforms.
+ Only supported for particular Intel Xe graphics platforms.
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov
new file mode 100644
index 000000000000..2fd7e9b7bacc
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov
@@ -0,0 +1,159 @@
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ This directory appears for the particular Intel Xe device when:
+
+ - device supports SR-IOV, and
+ - device is a Physical Function (PF), and
+ - driver support for the SR-IOV PF is enabled on given device.
+
+ This directory is used as a root for all attributes required to
+ manage both Physical Function (PF) and Virtual Functions (VFs).
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ This directory holds attributes related to the SR-IOV Physical
+ Function (PF).
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf1/
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf2/
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<N>/
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ These directories hold attributes related to the SR-IOV Virtual
+ Functions (VFs).
+
+ Note that the VF number <N> is 1-based as described in PCI SR-IOV
+ specification as the Xe driver follows that naming schema.
+
+ There could be "vf1", "vf2" and so on, up to "vf<N>", where <N>
+ matches the value of the "sriov_totalvfs" attribute.
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/exec_quantum_ms
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/preempt_timeout_us
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/profile/sched_priority
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/exec_quantum_ms
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/preempt_timeout_us
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/profile/sched_priority
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ These files expose scheduling parameters for the PF and its VFs, and
+ are visible only on Intel Xe platforms that use time-sliced GPU sharing.
+ They can be changed even if VFs are enabled and running and reflect the
+ settings of all tiles/GTs assigned to the given function.
+
+ exec_quantum_ms: (RW) unsigned integer
+ The GT execution quantum (EQ) in [ms] for the given function.
+ Actual quantum value might be aligned per HW/FW requirements.
+
+ Default is 0 (unlimited).
+
+ preempt_timeout_us: (RW) unsigned integer
+ The GT preemption timeout in [us] of the given function.
+ Actual timeout value might be aligned per HW/FW requirements.
+
+ Default is 0 (unlimited).
+
+ sched_priority: (RW/RO) string
+ The GT scheduling priority of the given function.
+
+ "low" - function will be scheduled on the GPU for its EQ/PT
+ only if function has any work already submitted.
+
+ "normal" - functions will be scheduled on the GPU for its EQ/PT
+ irrespective of whether it has submitted a work or not.
+
+ "high" - function will be scheduled on the GPU for its EQ/PT
+ in the next time-slice after the current one completes
+ and function has a work submitted.
+
+ Default is "low".
+
+ When read, this file will display the current and available
+ scheduling priorities. The currently active priority level will
+ be enclosed in square brackets, like:
+
+ [low] normal high
+
+ This file can be read-only if changing the priority is not
+ supported.
+
+ Writes to these attributes may fail with errors like:
+ -EINVAL if provided input is malformed or not recognized,
+ -EPERM if change is not applicable on given HW/FW,
+ -EIO if FW refuses to change the provisioning.
+
+ Reads from these attributes may fail with:
+ -EUCLEAN if value is not consistent across all tiles/GTs.
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/exec_quantum_ms
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/preempt_timeout_us
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/sched_priority
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ These files allows bulk reconfiguration of the scheduling parameters
+ of the PF or VFs and are available only for Intel Xe platforms with
+ GPU sharing based on the time-slice basis. These scheduling parameters
+ can be changed even if VFs are enabled and running.
+
+ exec_quantum_ms: (WO) unsigned integer
+ The GT execution quantum (EQ) in [ms] to be applied to all functions.
+ See sriov_admin/{pf,vf<N>}/profile/exec_quantum_ms for more details.
+
+ preempt_timeout_us: (WO) unsigned integer
+ The GT preemption timeout (PT) in [us] to be applied to all functions.
+ See sriov_admin/{pf,vf<N>}/profile/preempt_timeout_us for more details.
+
+ sched_priority: (RW/RO) string
+ The GT scheduling priority to be applied for all functions.
+ See sriov_admin/{pf,vf<N>}/profile/sched_priority for more details.
+
+ Writes to these attributes may fail with errors like:
+ -EINVAL if provided input is malformed or not recognized,
+ -EPERM if change is not applicable on given HW/FW,
+ -EIO if FW refuses to change the provisioning.
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/stop
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ This file allows to control scheduling of the VF on the Intel Xe GPU
+ platforms. It allows to implement custom policy mechanism in case VFs
+ are misbehaving or triggering adverse events above defined thresholds.
+
+ stop: (WO) bool
+ All GT executions of given function shall be immediately stopped.
+ To allow scheduling this VF again, the VF FLR must be triggered.
+
+ Writes to this attribute may fail with errors like:
+ -EINVAL if provided input is malformed or not recognized,
+ -EPERM if change is not applicable on given HW/FW,
+ -EIO if FW refuses to change the scheduling.
+
+
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/pf/device
+What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf<n>/device
+Date: October 2025
+KernelVersion: 6.19
+Contact: intel-xe@lists.freedesktop.org
+Description:
+ These are symlinks to the underlying PCI device entry representing
+ given Xe SR-IOV function. For the PF, this link is always present.
+ For VFs, this link is present only for currently enabled VFs.
diff --git a/Documentation/ABI/testing/sysfs-driver-panfrost-profiling b/Documentation/ABI/testing/sysfs-driver-panfrost-profiling
new file mode 100644
index 000000000000..7597c420e54b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-panfrost-profiling
@@ -0,0 +1,10 @@
+What: /sys/bus/platform/drivers/panfrost/.../profiling
+Date: February 2024
+KernelVersion: 6.8.0
+Contact: Adrian Larumbe <adrian.larumbe@collabora.com>
+Description:
+ Get/set drm fdinfo's engine and cycles profiling status.
+ Valid values are:
+ 0: Don't enable fdinfo job profiling sources.
+ 1: Enable fdinfo job profiling sources, this enables both the GPU's
+ timestamp and cycle counter registers.
diff --git a/Documentation/ABI/testing/sysfs-driver-panthor-profiling b/Documentation/ABI/testing/sysfs-driver-panthor-profiling
new file mode 100644
index 000000000000..af05fccedc15
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-panthor-profiling
@@ -0,0 +1,10 @@
+What: /sys/bus/platform/drivers/panthor/.../profiling
+Date: September 2024
+KernelVersion: 6.11.0
+Contact: Adrian Larumbe <adrian.larumbe@collabora.com>
+Description:
+ Bitmask to enable drm fdinfo's job profiling measurements.
+ Valid values are:
+ 0: Don't enable fdinfo job profiling sources.
+ 1: Enable GPU cycle measurements for running jobs.
+ 2: Enable GPU timestamp sampling for running jobs.
diff --git a/Documentation/ABI/testing/sysfs-driver-qaic b/Documentation/ABI/testing/sysfs-driver-qaic
new file mode 100644
index 000000000000..f794fd734163
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-qaic
@@ -0,0 +1,18 @@
+What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ce_count
+Date: May 2025
+KernelVersion: 6.17
+Contact: dri-devel@lists.freedesktop.org
+Description: Number of correctable errors received from device since driver is loaded.
+
+What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ue_count
+Date: May 2025
+KernelVersion: 6.17
+Contact: dri-devel@lists.freedesktop.org
+Description: Number of uncorrectable errors received from device since driver is loaded.
+
+What: /sys/bus/pci/drivers/qaic/XXXX:XX:XX.X/ue_nonfatal_count
+Date: May 2025
+KernelVersion: 6.17
+Contact: dri-devel@lists.freedesktop.org
+Description: Number of uncorrectable non-fatal errors received from device since driver
+ is loaded.
diff --git a/Documentation/ABI/testing/sysfs-driver-qat b/Documentation/ABI/testing/sysfs-driver-qat
index bbf329cf0d67..b0561b9fc4eb 100644
--- a/Documentation/ABI/testing/sysfs-driver-qat
+++ b/Documentation/ABI/testing/sysfs-driver-qat
@@ -14,7 +14,7 @@ Description: (RW) Reports the current state of the QAT device. Write to
It is possible to transition the device from up to down only
if the device is up and vice versa.
- This attribute is only available for qat_4xxx devices.
+ This attribute is available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat/cfg_services
Date: June 2022
@@ -23,24 +23,28 @@ Contact: qat-linux@intel.com
Description: (RW) Reports the current configuration of the QAT device.
Write to the file to change the configured services.
- The values are:
-
- * sym;asym: the device is configured for running crypto
- services
- * asym;sym: identical to sym;asym
- * dc: the device is configured for running compression services
- * dcc: identical to dc but enables the dc chaining feature,
- hash then compression. If this is not required chose dc
- * sym: the device is configured for running symmetric crypto
- services
- * asym: the device is configured for running asymmetric crypto
- services
- * asym;dc: the device is configured for running asymmetric
- crypto services and compression services
- * dc;asym: identical to asym;dc
- * sym;dc: the device is configured for running symmetric crypto
- services and compression services
- * dc;sym: identical to sym;dc
+ One or more services can be enabled per device.
+ Certain configurations are restricted to specific device types;
+ where applicable this is explicitly indicated, for example
+ (qat_6xxx) denotes applicability exclusively to that device series.
+
+ The available services include:
+
+ * sym: Configures the device for symmetric cryptographic operations.
+ * asym: Configures the device for asymmetric cryptographic operations.
+ * dc: Configures the device for compression and decompression
+ operations.
+ * dcc: Similar to dc, but with the additional dc chaining feature
+ enabled, cipher then compress (qat_6xxx), hash then compression.
+ If this is not required choose dc.
+ * decomp: Configures the device for decompression operations (qat_6xxx).
+
+ Service combinations are permitted for all services except dcc.
+ On QAT GEN4 devices (qat_4xxx driver) a maximum of two services can be
+ combined and on QAT GEN6 devices (qat_6xxx driver ) a maximum of three
+ services can be combined.
+ The order of services is not significant. For instance, sym;asym is
+ functionally equivalent to asym;sym.
It is possible to set the configuration only if the device
is in the `down` state (see /sys/bus/pci/devices/<BDF>/qat/state)
@@ -59,7 +63,7 @@ Description: (RW) Reports the current configuration of the QAT device.
# cat /sys/bus/pci/devices/<BDF>/qat/cfg_services
dc
- This attribute is only available for qat_4xxx devices.
+ This attribute is available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat/pm_idle_enabled
Date: June 2023
@@ -94,7 +98,7 @@ Description: (RW) This configuration option provides a way to force the device i
# cat /sys/bus/pci/devices/<BDF>/qat/pm_idle_enabled
0
- This attribute is only available for qat_4xxx devices.
+ This attribute is available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat/rp2srv
Date: January 2024
@@ -126,7 +130,7 @@ Description:
# cat /sys/bus/pci/devices/<BDF>/qat/rp2srv
sym
- This attribute is only available for qat_4xxx devices.
+ This attribute is available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat/num_rps
Date: January 2024
@@ -140,4 +144,24 @@ Description:
# cat /sys/bus/pci/devices/<BDF>/qat/num_rps
64
- This attribute is only available for qat_4xxx devices.
+ This attribute is available for qat_4xxx and qat_6xxx devices.
+
+What: /sys/bus/pci/devices/<BDF>/qat/auto_reset
+Date: May 2024
+KernelVersion: 6.9
+Contact: qat-linux@intel.com
+Description: (RW) Reports the current state of the autoreset feature
+ for a QAT device
+
+ Write to the attribute to enable or disable device auto reset.
+
+ Device auto reset is disabled by default.
+
+ The values are:
+
+ * 1/Yy/on: auto reset enabled. If the device encounters an
+ unrecoverable error, it will be reset automatically.
+ * 0/Nn/off: auto reset disabled. If the device encounters an
+ unrecoverable error, it will not be reset.
+
+ This attribute is available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/sysfs-driver-qat_ras b/Documentation/ABI/testing/sysfs-driver-qat_ras
index 176dea1e9c0a..82ceb04445ec 100644
--- a/Documentation/ABI/testing/sysfs-driver-qat_ras
+++ b/Documentation/ABI/testing/sysfs-driver-qat_ras
@@ -4,7 +4,7 @@ KernelVersion: 6.7
Contact: qat-linux@intel.com
Description: (RO) Reports the number of correctable errors detected by the device.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
Date: January 2024
@@ -12,7 +12,7 @@ KernelVersion: 6.7
Contact: qat-linux@intel.com
Description: (RO) Reports the number of non fatal errors detected by the device.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
Date: January 2024
@@ -20,7 +20,7 @@ KernelVersion: 6.7
Contact: qat-linux@intel.com
Description: (RO) Reports the number of fatal errors detected by the device.
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
Date: January 2024
@@ -38,4 +38,4 @@ Description: (WO) Write to resets all error counters of a device.
# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
0
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/sysfs-driver-qat_rl b/Documentation/ABI/testing/sysfs-driver-qat_rl
index 8c282ae3155d..d534f89b4971 100644
--- a/Documentation/ABI/testing/sysfs-driver-qat_rl
+++ b/Documentation/ABI/testing/sysfs-driver-qat_rl
@@ -31,7 +31,7 @@ Description:
* rm_all: Removes all the configured SLAs.
* Inputs: None
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_rl/rp
Date: January 2024
@@ -68,7 +68,7 @@ Description:
## Write
# echo 0x5 > /sys/bus/pci/devices/<BDF>/qat_rl/rp
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_rl/id
Date: January 2024
@@ -101,7 +101,7 @@ Description:
# cat /sys/bus/pci/devices/<BDF>/qat_rl/rp
0x5 ## ring pair ID 0 and ring pair ID 2
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_rl/cir
Date: January 2024
@@ -135,7 +135,7 @@ Description:
# cat /sys/bus/pci/devices/<BDF>/qat_rl/cir
500
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_rl/pir
Date: January 2024
@@ -169,7 +169,7 @@ Description:
# cat /sys/bus/pci/devices/<BDF>/qat_rl/pir
750
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_rl/srv
Date: January 2024
@@ -202,7 +202,7 @@ Description:
# cat /sys/bus/pci/devices/<BDF>/qat_rl/srv
dc
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
What: /sys/bus/pci/devices/<BDF>/qat_rl/cap_rem
Date: January 2024
@@ -223,4 +223,4 @@ Description:
# cat /sys/bus/pci/devices/<BDF>/qat_rl/cap_rem
0
- This attribute is only available for qat_4xxx devices.
+ This attribute is only available for qat_4xxx and qat_6xxx devices.
diff --git a/Documentation/ABI/testing/sysfs-driver-samsung-laptop b/Documentation/ABI/testing/sysfs-driver-samsung-laptop
index 28c9c040de5d..408cb0ddf4aa 100644
--- a/Documentation/ABI/testing/sysfs-driver-samsung-laptop
+++ b/Documentation/ABI/testing/sysfs-driver-samsung-laptop
@@ -20,17 +20,6 @@ Description: Some Samsung laptops have different "performance levels"
and it's still unknown if this value even changes
anything, other than making the user feel a bit better.
-What: /sys/devices/platform/samsung/battery_life_extender
-Date: December 1, 2011
-KernelVersion: 3.3
-Contact: Corentin Chary <corentin.chary@gmail.com>
-Description: Max battery charge level can be modified, battery cycle
- life can be extended by reducing the max battery charge
- level.
-
- - 0 means normal battery mode (100% charge)
- - 1 means battery life extender mode (80% charge)
-
What: /sys/devices/platform/samsung/usb_charge
Date: December 1, 2011
KernelVersion: 3.3
diff --git a/Documentation/ABI/testing/sysfs-driver-spi-intel b/Documentation/ABI/testing/sysfs-driver-spi-intel
new file mode 100644
index 000000000000..d7c9139ddbf3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-spi-intel
@@ -0,0 +1,20 @@
+What: /sys/devices/.../intel_spi_protected
+Date: Feb 2025
+KernelVersion: 6.13
+Contact: Alexander Usyskin <alexander.usyskin@intel.com>
+Description: This attribute allows the userspace to check if the
+ Intel SPI flash controller is write protected from the host.
+
+What: /sys/devices/.../intel_spi_locked
+Date: Feb 2025
+KernelVersion: 6.13
+Contact: Alexander Usyskin <alexander.usyskin@intel.com>
+Description: This attribute allows the user space to check if the
+ Intel SPI flash controller locks supported opcodes.
+
+What: /sys/devices/.../intel_spi_bios_locked
+Date: Feb 2025
+KernelVersion: 6.13
+Contact: Alexander Usyskin <alexander.usyskin@intel.com>
+Description: This attribute allows the user space to check if the
+ Intel SPI flash controller BIOS region is locked for writes.
diff --git a/Documentation/ABI/testing/sysfs-driver-typec-displayport b/Documentation/ABI/testing/sysfs-driver-typec-displayport
index 256c87c5219a..314acd54e13e 100644
--- a/Documentation/ABI/testing/sysfs-driver-typec-displayport
+++ b/Documentation/ABI/testing/sysfs-driver-typec-displayport
@@ -62,3 +62,13 @@ Description:
by VESA DisplayPort Alt Mode on USB Type-C Standard.
- 0 when HPD’s logical state is low (HPD_Low) as defined by
VESA DisplayPort Alt Mode on USB Type-C Standard.
+
+What: /sys/bus/typec/devices/.../displayport/irq_hpd
+Date: June 2025
+Contact: RD Babiera <rdbabiera@google.com>
+Description:
+ IRQ_HPD events are sent over the USB PD protocol in Status Update and
+ Attention messages. IRQ_HPD can only be asserted when HPD is high,
+ and is asserted when an IRQ_HPD has been issued since the last Status
+ Update. This is a read only node that returns the number of IRQ events
+ raised in the driver's lifetime.
diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs
index 5bf7073b4f75..a90612ab5780 100644
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -711,7 +711,7 @@ Description: This file shows the thin provisioning type. This is one of
The file is read only.
-What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count
+What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count
Date: February 2018
Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
Description: This file shows the total physical memory resources. This is
@@ -920,14 +920,16 @@ Description: This file shows whether the configuration descriptor is locked.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/max_number_of_rtt
What: /sys/bus/platform/devices/*.ufs/attributes/max_number_of_rtt
-Date: February 2018
-Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
+Date: May 2024
+Contact: Avri Altman <avri.altman@wdc.com>
Description: This file provides the maximum current number of
- outstanding RTTs in device that is allowed. The full
- information about the attribute could be found at
- UFS specifications 2.1.
+ outstanding RTTs in device that is allowed. bMaxNumOfRTT is a
+ read-write persistent attribute and is equal to two after device
+ manufacturing. It shall not be set to a value greater than
+ bDeviceRTTCap value, and it may be set only when the hw queues are
+ empty.
- The file is read only.
+ The file is read write.
What: /sys/bus/platform/drivers/ufshcd/*/attributes/exception_event_control
What: /sys/bus/platform/devices/*.ufs/attributes/exception_event_control
@@ -1530,3 +1532,239 @@ Contact: Bean Huo <beanhuo@micron.com>
Description:
rtc_update_ms indicates how often the host should synchronize or update the
UFS RTC. If set to 0, this will disable UFS RTC periodic update.
+
+What: /sys/devices/platform/.../ufshci_capabilities/version
+Date: August 2024
+Contact: Avri Altman <avri.altman@wdc.com>
+Description:
+ Host Capabilities register group: UFS version register.
+ Symbol - VER. This file shows the UFSHCD version.
+ Example: Version 3.12 would be represented as 0000_0312h.
+ The file is read only.
+
+What: /sys/devices/platform/.../ufshci_capabilities/product_id
+Date: August 2024
+Contact: Avri Altman <avri.altman@wdc.com>
+Description:
+ Host Capabilities register group: product ID register.
+ Symbol - HCPID. This file shows the UFSHCD product id.
+ The content of this register is vendor specific.
+ The file is read only.
+
+What: /sys/devices/platform/.../ufshci_capabilities/man_id
+Date: August 2024
+Contact: Avri Altman <avri.altman@wdc.com>
+Description:
+ Host Capabilities register group: manufacturer ID register.
+ Symbol - HCMID. This file shows the UFSHCD manufacturer id.
+ The Manufacturer ID is defined by JEDEC in JEDEC-JEP106.
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/critical_health
+What: /sys/bus/platform/devices/*.ufs/critical_health
+Date: February 2025
+Contact: Avri Altman <avri.altman@wdc.com>
+Description: Report the number of times a critical health event has been
+ reported by a UFS device. Further insight into the specific
+ issue can be gained by reading one of: bPreEOLInfo,
+ bDeviceLifeTimeEstA, bDeviceLifeTimeEstB,
+ bWriteBoosterBufferLifeTimeEst, and bRPMBLifeTimeEst.
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/clkscale_enable
+What: /sys/bus/platform/devices/*.ufs/clkscale_enable
+Date: January 2025
+Contact: Ziqi Chen <quic_ziqichen@quicinc.com>
+Description:
+ This attribute shows whether the UFS clock scaling is enabled or not.
+ And it can be used to enable/disable the clock scaling by writing
+ 1 or 0 to this attribute.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/clkgate_enable
+What: /sys/bus/platform/devices/*.ufs/clkgate_enable
+Date: January 2025
+Contact: Ziqi Chen <quic_ziqichen@quicinc.com>
+Description:
+ This attribute shows whether the UFS clock gating is enabled or not.
+ And it can be used to enable/disable the clock gating by writing
+ 1 or 0 to this attribute.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/clkgate_delay_ms
+What: /sys/bus/platform/devices/*.ufs/clkgate_delay_ms
+Date: January 2025
+Contact: Ziqi Chen <quic_ziqichen@quicinc.com>
+Description:
+ This attribute shows and sets the number of milliseconds of idle time
+ before the UFS driver starts to perform clock gating. This can
+ prevent the UFS from frequently performing clock gating/ungating.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_count
+What: /sys/bus/platform/devices/*.ufs/device_lvl_exception_count
+Date: March 2025
+Contact: Bao D. Nguyen <quic_nguyenb@quicinc.com>
+Description:
+ This attribute is applicable to ufs devices compliant to the
+ JEDEC specifications version 4.1 or later. The
+ device_lvl_exception_count is a counter indicating the number of
+ times the device level exceptions have occurred since the last
+ time this variable is reset. Writing a 0 value to this
+ attribute will reset the device_lvl_exception_count. If the
+ device_lvl_exception_count reads a positive value, the user
+ application should read the device_lvl_exception_id attribute to
+ know more information about the exception.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_id
+What: /sys/bus/platform/devices/*.ufs/device_lvl_exception_id
+Date: March 2025
+Contact: Bao D. Nguyen <quic_nguyenb@quicinc.com>
+Description:
+ Reading the device_lvl_exception_id returns the
+ qDeviceLevelExceptionID attribute of the ufs device JEDEC
+ specification version 4.1. The definition of the
+ qDeviceLevelExceptionID is the ufs device vendor specific
+ implementation. Refer to the device manufacturer datasheet for
+ more information on the meaning of the qDeviceLevelExceptionID
+ attribute value.
+
+ The attribute is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/wb_resize_enable
+What: /sys/bus/platform/devices/*.ufs/wb_resize_enable
+Date: April 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host can enable the WriteBooster buffer resize by setting this
+ attribute.
+
+ ======== ======================================
+ idle There is no resize operation
+ decrease Decrease WriteBooster buffer size
+ increase Increase WriteBooster buffer size
+ ======== ======================================
+
+ The file is write only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_resize_hint
+What: /sys/bus/platform/devices/*.ufs/attributes/wb_resize_hint
+Date: April 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ wb_resize_hint indicates hint information about which type of resize
+ for WriteBooster buffer is recommended by the device.
+
+ ========= ======================================
+ keep Recommend keep the buffer size
+ decrease Recommend to decrease the buffer size
+ increase Recommend to increase the buffer size
+ ========= ======================================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/attributes/wb_resize_status
+What: /sys/bus/platform/devices/*.ufs/attributes/wb_resize_status
+Date: April 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host can check the resize operation status of the WriteBooster
+ buffer by reading this attribute.
+
+ ================ ========================================
+ idle Resize operation is not issued
+ in_progress Resize operation in progress
+ complete_success Resize operation completed successfully
+ general_failure Resize operation general failure
+ ================ ========================================
+
+ The file is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/analysis_trigger
+What: /sys/bus/platform/devices/*.ufs/hid/analysis_trigger
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host can enable or disable HID analysis operation.
+
+ ======= =========================================
+ disable disable HID analysis operation
+ enable enable HID analysis operation
+ ======= =========================================
+
+ The file is write only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/defrag_trigger
+What: /sys/bus/platform/devices/*.ufs/hid/defrag_trigger
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host can enable or disable HID defragmentation operation.
+
+ ======= =========================================
+ disable disable HID defragmentation operation
+ enable enable HID defragmentation operation
+ ======= =========================================
+
+ The attribute is write only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/fragmented_size
+What: /sys/bus/platform/devices/*.ufs/hid/fragmented_size
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The total fragmented size in the device is reported through
+ this attribute.
+
+ The attribute is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/defrag_size
+What: /sys/bus/platform/devices/*.ufs/hid/defrag_size
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The host sets the size to be defragmented by an HID
+ defragmentation operation.
+
+ The attribute is read/write.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/progress_ratio
+What: /sys/bus/platform/devices/*.ufs/hid/progress_ratio
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ Defragmentation progress is reported by this attribute,
+ indicates the ratio of the completed defragmentation size
+ over the requested defragmentation size.
+
+ ==== ============================================
+ 1 1%
+ ...
+ 100 100%
+ ==== ============================================
+
+ The attribute is read only.
+
+What: /sys/bus/platform/drivers/ufshcd/*/hid/state
+What: /sys/bus/platform/devices/*.ufs/hid/state
+Date: May 2025
+Contact: Huan Tang <tanghuan@vivo.com>
+Description:
+ The HID state is reported by this attribute.
+
+ ==================== ===========================
+ idle Idle (analysis required)
+ analysis_in_progress Analysis in progress
+ defrag_required Defrag required
+ defrag_in_progress Defrag in progress
+ defrag_completed Defrag completed
+ defrag_not_required Defrag is not required
+ ==================== ===========================
+
+ The attribute is read only.
diff --git a/Documentation/ABI/testing/sysfs-driver-uio_pci_sva-pasid b/Documentation/ABI/testing/sysfs-driver-uio_pci_sva-pasid
new file mode 100644
index 000000000000..6892fe46cea8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-uio_pci_sva-pasid
@@ -0,0 +1,29 @@
+What: /sys/bus/pci/drivers/uio_pci_sva/<pci_dev>/pasid
+Date: September 2025
+Contact: Yaxing Guo <guoyaxing@bosc.ac.cn>
+Description:
+ Process Address Space ID (PASID) assigned by IOMMU driver to
+ the device for use with Shared Virtual Addressing (SVA).
+
+ This read-only attribute exposes the PASID (A 20-bit identifier
+ used in PCIe Address Translation Services and iommu table walks)
+ allocated by the IOMMU driver during sva device binding.
+
+ User-space UIO applications must read this attribute to obtain
+ the PASID and program it into the device's configuration registers.
+ This enables the device to perform DMA using user-space virtual
+ address, with address translation handled by IOMMU.
+
+ UIO User-space applications must:
+ - Opening device and Mapping the device's register space via /dev/uioX
+ (This triggers the IOMMU driver to allocate the PASID)
+ - Reading the PASID from sysfs
+ - Writing the PASID to a device-specific register (with example offset)
+ The code may be like:
+
+ map = mmap(..., "/dev/uio0", ...);
+
+ f = fopen("/sys/.../pasid", "r");
+ fscanf(f, "%d", &pasid);
+
+ map[REG_PASID_OFFSET] = pasid;
diff --git a/Documentation/ABI/testing/sysfs-driver-uniwill-laptop b/Documentation/ABI/testing/sysfs-driver-uniwill-laptop
new file mode 100644
index 000000000000..eaeb659793d2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-uniwill-laptop
@@ -0,0 +1,53 @@
+What: /sys/bus/platform/devices/INOU0000:XX/fn_lock_toggle_enable
+Date: November 2025
+KernelVersion: 6.19
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ Allows userspace applications to enable/disable the FN lock feature
+ of the integrated keyboard by writing "1"/"0" into this file.
+
+ Reading this file returns the current enable status of the FN lock functionality.
+
+What: /sys/bus/platform/devices/INOU0000:XX/super_key_toggle_enable
+Date: November 2025
+KernelVersion: 6.19
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ Allows userspace applications to enable/disable the super key functionality
+ of the integrated keyboard by writing "1"/"0" into this file.
+
+ Reading this file returns the current enable status of the super key functionality.
+
+What: /sys/bus/platform/devices/INOU0000:XX/touchpad_toggle_enable
+Date: November 2025
+KernelVersion: 6.19
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ Allows userspace applications to enable/disable the touchpad toggle functionality
+ of the integrated touchpad by writing "1"/"0" into this file.
+
+ Reading this file returns the current enable status of the touchpad toggle
+ functionality.
+
+What: /sys/bus/platform/devices/INOU0000:XX/rainbow_animation
+Date: November 2025
+KernelVersion: 6.19
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ Forces the integrated lightbar to display a rainbow animation when the machine
+ is not suspended. Writing "1"/"0" into this file enables/disables this
+ functionality.
+
+ Reading this file returns the current status of the rainbow animation functionality.
+
+What: /sys/bus/platform/devices/INOU0000:XX/breathing_in_suspend
+Date: November 2025
+KernelVersion: 6.19
+Contact: Armin Wolf <W_Armin@gmx.de>
+Description:
+ Causes the integrated lightbar to display a breathing animation when the machine
+ has been suspended and is running on AC power. Writing "1"/"0" into this file
+ enables/disables this functionality.
+
+ Reading this file returns the current status of the breathing animation
+ functionality.
diff --git a/Documentation/ABI/testing/sysfs-edac-ecs b/Documentation/ABI/testing/sysfs-edac-ecs
new file mode 100644
index 000000000000..87c885c4eb1a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-edac-ecs
@@ -0,0 +1,74 @@
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ The sysfs EDAC bus devices /<dev-name>/ecs_fruX subdirectory
+ pertains to the memory media ECS (Error Check Scrub) control
+ feature, where <dev-name> directory corresponds to a device
+ registered with the EDAC device driver for the ECS feature.
+ /ecs_fruX belongs to the media FRUs (Field Replaceable Unit)
+ under the memory device.
+
+ The sysfs ECS attr nodes are only present if the parent
+ driver has implemented the corresponding attr callback
+ function and provided the necessary operations to the EDAC
+ device driver during registration.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX/log_entry_type
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The log entry type of how the DDR5 ECS log is reported.
+
+ - 0 - per DRAM.
+
+ - 1 - per memory media FRU.
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX/mode
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The mode of how the DDR5 ECS counts the errors.
+ Error count is tracked based on two different modes
+ selected by DDR5 ECS Control Feature - Codeword mode and
+ Row Count mode. If the ECS is under Codeword mode, then
+ the error count increments each time a codeword with check
+ bit errors is detected. If the ECS is under Row Count mode,
+ then the error counter increments each time a row with
+ check bit errors is detected.
+
+ - 0 - ECS counts rows in the memory media that have ECC errors.
+
+ - 1 - ECS counts codewords with errors, specifically, it counts
+ the number of ECC-detected errors in the memory media.
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX/reset
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (WO) ECS reset ECC counter.
+
+ - 1 - reset ECC counter to the default value.
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/ecs_fruX/threshold
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) DDR5 ECS threshold count per gigabits of memory cells.
+ The ECS error count is subject to the ECS Threshold count
+ per Gbit, which masks error counts less than the Threshold.
+
+ Supported values are 256, 1024 and 4096.
+
+ All other values are reserved.
diff --git a/Documentation/ABI/testing/sysfs-edac-memory-repair b/Documentation/ABI/testing/sysfs-edac-memory-repair
new file mode 100644
index 000000000000..0434a3b23ff3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-edac-memory-repair
@@ -0,0 +1,206 @@
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ The sysfs EDAC bus devices /<dev-name>/mem_repairX subdirectory
+ pertains to the memory media repair features control, such as
+ PPR (Post Package Repair), memory sparing etc, where <dev-name>
+ directory corresponds to a device registered with the EDAC
+ device driver for the memory repair features.
+
+ Post Package Repair is a maintenance operation requests the memory
+ device to perform a repair operation on its media. It is a memory
+ self-healing feature that fixes a failing memory location by
+ replacing it with a spare row in a DRAM device. For example, a
+ CXL memory device with DRAM components that support PPR features may
+ implement PPR maintenance operations. DRAM components may support
+ two types of PPR functions: hard PPR, for a permanent row repair, and
+ soft PPR, for a temporary row repair. Soft PPR may be much faster
+ than hard PPR, but the repair is lost with a power cycle.
+
+ The sysfs attributes nodes for a repair feature are only
+ present if the parent driver has implemented the corresponding
+ attr callback function and provided the necessary operations
+ to the EDAC device driver during registration.
+
+ In some states of system configuration (e.g. before address
+ decoders have been configured), memory devices (e.g. CXL)
+ may not have an active mapping in the main host address
+ physical address map. As such, the memory to repair must be
+ identified by a device specific physical addressing scheme
+ using a device physical address(DPA). The DPA and other control
+ attributes to use will be presented in related error records.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/repair_type
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) Memory repair type. For eg. post package repair,
+ memory sparing etc. Valid values are:
+
+ - ppr - Post package repair.
+
+ - cacheline-sparing
+
+ - row-sparing
+
+ - bank-sparing
+
+ - rank-sparing
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/persist_mode
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Get/Set the current persist repair mode set for a
+ repair function. Persist repair modes supported in the
+ device, based on a memory repair function, either is temporary,
+ which is lost with a power cycle or permanent. Valid values are:
+
+ - 0 - Soft memory repair (temporary repair).
+
+ - 1 - Hard memory repair (permanent repair).
+
+ - All other values are reserved.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/repair_safe_when_in_use
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) True if memory media is accessible and data is retained
+ during the memory repair operation.
+ The data may not be retained and memory requests may not be
+ correctly processed during a repair operation. In such case
+ repair operation can not be executed at runtime. The memory
+ must be taken offline.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/hpa
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Host Physical Address (HPA) of the memory to repair.
+ The HPA to use will be provided in related error records.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/dpa
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Device Physical Address (DPA) of the memory to repair.
+ The specific DPA to use will be provided in related error
+ records.
+
+ In some states of system configuration (e.g. before address
+ decoders have been configured), memory devices (e.g. CXL)
+ may not have an active mapping in the main host address
+ physical address map. As such, the memory to repair must be
+ identified by a device specific physical addressing scheme
+ using a DPA. The device physical address(DPA) to use will be
+ presented in related error records.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/nibble_mask
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Read/Write Nibble mask of the memory to repair.
+ Nibble mask identifies one or more nibbles in error on the
+ memory bus that produced the error event. Nibble Mask bit 0
+ shall be set if nibble 0 on the memory bus produced the
+ event, etc. For example, CXL PPR and sparing, a nibble mask
+ bit set to 1 indicates the request to perform repair
+ operation in the specific device. All nibble mask bits set
+ to 1 indicates the request to perform the operation in all
+ devices. Eg. for CXL memory repair, the specific value of
+ nibble mask to use will be provided in related error records.
+ For more details, See nibble mask field in CXL spec ver 3.1,
+ section 8.2.9.7.1.2 Table 8-103 soft PPR and section
+ 8.2.9.7.1.3 Table 8-104 hard PPR, section 8.2.9.7.1.4
+ Table 8-105 memory sparing.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/min_hpa
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/max_hpa
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/min_dpa
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/max_dpa
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The supported range of memory address that is to be
+ repaired. The memory device may give the supported range of
+ attributes to use and it will depend on the memory device
+ and the portion of memory to repair.
+ The userspace may receive the specific value of attributes
+ to use for a repair operation from the memory device via
+ related error records and trace events, for eg. CXL DRAM
+ and CXL general media error records in CXL memory devices.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/bank_group
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/bank
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/rank
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/row
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/column
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/channel
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/sub_channel
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The control attributes for the memory to be repaired.
+ The specific value of attributes to use depends on the
+ portion of memory to repair and will be reported to the host
+ in related error records and be available to userspace
+ in trace events, such as CXL DRAM and CXL general media
+ error records of CXL memory devices.
+
+ When readng back these attributes, it returns the current
+ value of memory requested to be repaired.
+
+ bank_group - The bank group of the memory to repair.
+
+ bank - The bank number of the memory to repair.
+
+ rank - The rank of the memory to repair. Rank is defined as a
+ set of memory devices on a channel that together execute a
+ transaction.
+
+ row - The row number of the memory to repair.
+
+ column - The column number of the memory to repair.
+
+ channel - The channel of the memory to repair. Channel is
+ defined as an interface that can be independently accessed
+ for a transaction.
+
+ sub_channel - The subchannel of the memory to repair.
+
+ The requirement to set these attributes varies based on the
+ repair function. The attributes in sysfs are not present
+ unless required for a repair function.
+
+ For example, CXL spec ver 3.1, Section 8.2.9.7.1.2 Table 8-103
+ soft PPR and Section 8.2.9.7.1.3 Table 8-104 hard PPR operations,
+ these attributes are not required to set. CXL spec ver 3.1,
+ Section 8.2.9.7.1.4 Table 8-105 memory sparing, these attributes
+ are required to set based on memory sparing granularity.
+
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/repair
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (WO) Issue the memory repair operation for the specified
+ memory repair attributes. The operation may fail if resources
+ are insufficient based on the requirements of the memory
+ device and repair function.
+
+ - 1 - Issue the repair operation.
+
+ - All other values are reserved.
diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub
new file mode 100644
index 000000000000..ab6014743da5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-edac-scrub
@@ -0,0 +1,85 @@
+What: /sys/bus/edac/devices/<dev-name>/scrubX
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ The sysfs EDAC bus devices /<dev-name>/scrubX subdirectory
+ belongs to an instance of memory scrub control feature,
+ where <dev-name> directory corresponds to a device/memory
+ region registered with the EDAC device driver for the
+ scrub control feature.
+
+ The sysfs scrub attr nodes are only present if the parent
+ driver has implemented the corresponding attr callback
+ function and provided the necessary operations to the EDAC
+ device driver during registration.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/addr
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The base address of the memory region to be scrubbed
+ for on-demand scrubbing. Setting address starts scrubbing.
+ The size must be set before that.
+
+ The readback addr value is non-zero if the requested
+ on-demand scrubbing is in progress, zero otherwise.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/size
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The size of the memory region to be scrubbed
+ (on-demand scrubbing).
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/enable_background
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) Start/Stop background (patrol) scrubbing if supported.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/min_cycle_duration
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) Supported minimum scrub cycle duration in seconds
+ by the memory scrubber.
+
+ Device-based scrub: returns the minimum scrub cycle
+ supported by the memory device.
+
+ Region-based scrub: returns the max of minimum scrub cycles
+ supported by individual memory devices that back the region.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RO) Supported maximum scrub cycle duration in seconds
+ by the memory scrubber.
+
+ Device-based scrub: returns the maximum scrub cycle supported
+ by the memory device.
+
+ Region-based scrub: returns the min of maximum scrub cycles
+ supported by individual memory devices that back the region.
+
+ If the memory device does not provide maximum scrub cycle
+ information, return the maximum supported value of the scrub
+ cycle field.
+
+What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The current scrub cycle duration in seconds and must be
+ within the supported range by the memory scrubber.
+
+ Scrub has an overhead when running and that may want to be
+ reduced by taking longer to do it.
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi
index 5249ad5a96d9..72e7c9161ce7 100644
--- a/Documentation/ABI/testing/sysfs-firmware-acpi
+++ b/Documentation/ABI/testing/sysfs-firmware-acpi
@@ -108,15 +108,15 @@ Description:
number of a "General Purpose Events" (GPE).
A GPE vectors to a specified handler in AML, which
- can do a anything the BIOS writer wants from
+ can do anything the BIOS writer wants from
OS context. GPE 0x12, for example, would vector
to a level or edge handler called _L12 or _E12.
The handler may do its business and return.
- Or the handler may send send a Notify event
+ Or the handler may send a Notify event
to a Linux device driver registered on an ACPI device,
such as a battery, or a processor.
- To figure out where all the SCI's are coming from,
+ To figure out where all the SCIs are coming from,
/sys/firmware/acpi/interrupts contains a file listing
every possible source, and the count of how many
times it has triggered::
@@ -248,3 +248,24 @@ Description:
# cat ff_pwr_btn
7 enabled
+What: /sys/firmware/acpi/memory_ranges/rangeX
+Date: February 2025
+Contact: Tony Luck <tony.luck@intel.com>
+Description:
+ On systems with the ACPI MRRM table reports the parameters for
+ each range.
+
+ base: Starting system physical address.
+
+ length: Length of this range in bytes.
+
+ node: NUMA node that this range belongs to. Negative numbers
+ indicate that the node number could not be determined (e.g
+ for an address range that is reserved for future hot add of
+ memory).
+
+ local_region_id: ID associated with access by agents
+ local to this range of addresses.
+
+ remote_region_id: ID associated with access by agents
+ non-local to this range of addresses.
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi
index 5e4d0b27cdfe..927e362d4974 100644
--- a/Documentation/ABI/testing/sysfs-firmware-efi
+++ b/Documentation/ABI/testing/sysfs-firmware-efi
@@ -36,3 +36,10 @@ Description: Displays the content of the Runtime Configuration Interface
Table version 2 on Dell EMC PowerEdge systems in binary format
Users: It is used by Dell EMC OpenManage Server Administrator tool to
populate BIOS setup page.
+
+What: /sys/firmware/efi/ovmf_debug_log
+Date: July 2025
+Contact: Gerd Hoffmann <kraxel@redhat.com>, linux-efi@vger.kernel.org
+Description: Displays the content of the OVMF debug log buffer. The file is
+ only present in case the firmware supports logging to a memory
+ buffer.
diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-powercap b/Documentation/ABI/testing/sysfs-firmware-opal-powercap
index c9b66ec4f165..d2d12ee89288 100644
--- a/Documentation/ABI/testing/sysfs-firmware-opal-powercap
+++ b/Documentation/ABI/testing/sysfs-firmware-opal-powercap
@@ -1,6 +1,6 @@
What: /sys/firmware/opal/powercap
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Powercap directory for Powernv (P8, P9) servers
Each folder in this directory contains a
@@ -11,7 +11,7 @@ What: /sys/firmware/opal/powercap/system-powercap
/sys/firmware/opal/powercap/system-powercap/powercap-max
/sys/firmware/opal/powercap/system-powercap/powercap-current
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: System powercap directory and attributes applicable for
Powernv (P8, P9) servers
diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-psr b/Documentation/ABI/testing/sysfs-firmware-opal-psr
index cc2ece70e365..1e55b56a0f89 100644
--- a/Documentation/ABI/testing/sysfs-firmware-opal-psr
+++ b/Documentation/ABI/testing/sysfs-firmware-opal-psr
@@ -1,6 +1,6 @@
What: /sys/firmware/opal/psr
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Power-Shift-Ratio directory for Powernv P9 servers
Power-Shift-Ratio allows to provide hints the firmware
@@ -10,7 +10,7 @@ Description: Power-Shift-Ratio directory for Powernv P9 servers
What: /sys/firmware/opal/psr/cpu_to_gpu_X
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: PSR sysfs attributes for Powernv P9 servers
Power-Shift-Ratio between CPU and GPU for a given chip
diff --git a/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
index 3a2dfe542e8c..fcb1fb4795b6 100644
--- a/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
+++ b/Documentation/ABI/testing/sysfs-firmware-opal-sensor-groups
@@ -1,6 +1,6 @@
What: /sys/firmware/opal/sensor_groups
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Sensor groups directory for POWER9 powernv servers
Each folder in this directory contains a sensor group
@@ -11,7 +11,7 @@ Description: Sensor groups directory for POWER9 powernv servers
What: /sys/firmware/opal/sensor_groups/<sensor_group_name>/clear
Date: August 2017
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Sysfs file to clear the min-max of all the sensors
belonging to the group.
diff --git a/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info
index 141a6b371469..f5cefb81ac9d 100644
--- a/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info
+++ b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info
@@ -1,6 +1,6 @@
What: /sys/firmware/papr/energy_scale_info
Date: February 2022
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Directory hosting a set of platform attributes like
energy/frequency on Linux running as a PAPR guest.
@@ -10,20 +10,20 @@ Description: Directory hosting a set of platform attributes like
What: /sys/firmware/papr/energy_scale_info/<id>
Date: February 2022
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Energy, frequency attributes directory for POWERVM servers
What: /sys/firmware/papr/energy_scale_info/<id>/desc
Date: February 2022
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: String description of the energy attribute of <id>
What: /sys/firmware/papr/energy_scale_info/<id>/value
Date: February 2022
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: Numeric value of the energy attribute of <id>
What: /sys/firmware/papr/energy_scale_info/<id>/value_desc
Date: February 2022
-Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
+Contact: Linux for PowerPC mailing list <linuxppc-dev@lists.ozlabs.org>
Description: String value of the energy attribute of <id>
diff --git a/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm b/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm
index ea5e5b489bc7..26741cb84504 100644
--- a/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm
+++ b/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm
@@ -12,15 +12,6 @@ Contact: Marek Behún <kabel@kernel.org>
Description: (Read) MAC addresses burned into eFuses of this Turris Mox board.
Format: %pM
-What: /sys/firmware/turris-mox-rwtm/pubkey
-Date: August 2019
-KernelVersion: 5.4
-Contact: Marek Behún <kabel@kernel.org>
-Description: (Read) ECDSA public key (in pubkey hex compressed form) computed
- as pair to the ECDSA private key burned into eFuses of this
- Turris Mox Board.
- Format: string
-
What: /sys/firmware/turris-mox-rwtm/ram_size
Date: August 2019
KernelVersion: 5.4
diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs
index 284224d1b56f..76d9808ed581 100644
--- a/Documentation/ABI/testing/sysfs-fs-erofs
+++ b/Documentation/ABI/testing/sysfs-fs-erofs
@@ -5,7 +5,7 @@ Description: Shows all enabled kernel features.
Supported features:
zero_padding, compr_cfgs, big_pcluster, chunked_file,
device_table, compr_head2, sb_chksum, ztailpacking,
- dedupe, fragments.
+ dedupe, fragments, 48bit, metabox.
What: /sys/fs/erofs/<disk>/sync_decompress
Date: November 2021
@@ -16,3 +16,30 @@ Description: Control strategy of sync decompression:
readahead on atomic contexts only.
- 1 (force on): enable for readpage and readahead.
- 2 (force off): disable for all situations.
+
+What: /sys/fs/erofs/<disk>/drop_caches
+Date: November 2024
+Contact: "Guo Chunhai" <guochunhai@vivo.com>
+Description: Writing to this will drop compression-related caches,
+ currently used to drop in-memory pclusters and cached
+ compressed folios:
+
+ - 1 : invalidate cached compressed folios
+ - 2 : drop in-memory pclusters
+ - 3 : drop in-memory pclusters and cached compressed folios
+
+What: /sys/fs/erofs/accel
+Date: May 2025
+Contact: "Bo Liu" <liubo03@inspur.com>
+Description: Used to set or show hardware accelerators in effect
+ and multiple accelerators are separated by '\n'.
+ Supported accelerator(s): qat_deflate.
+ Disable all accelerators with an empty string (echo > accel).
+
+What: /sys/fs/erofs/<disk>/dir_ra_bytes
+Date: July 2025
+Contact: "Chao Yu" <chao@kernel.org>
+Description: Used to set or show readahead bytes during readdir(), by
+ default the value is 16384.
+
+ - 0: disable readahead.
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 99fa87a43926..770470e0598b 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -205,7 +205,7 @@ Description: Controls the idle timing of system, if there is no FS operation
What: /sys/fs/f2fs/<disk>/discard_idle_interval
Date: September 2018
Contact: "Chao Yu" <yuchao0@huawei.com>
-Contact: "Sahitya Tummala" <stummala@codeaurora.org>
+Contact: "Sahitya Tummala" <quic_stummala@quicinc.com>
Description: Controls the idle timing of discard thread given
this time interval.
Default is 5 secs.
@@ -213,7 +213,7 @@ Description: Controls the idle timing of discard thread given
What: /sys/fs/f2fs/<disk>/gc_idle_interval
Date: September 2018
Contact: "Chao Yu" <yuchao0@huawei.com>
-Contact: "Sahitya Tummala" <stummala@codeaurora.org>
+Contact: "Sahitya Tummala" <quic_stummala@quicinc.com>
Description: Controls the idle timing for gc path. Set to 5 seconds by default.
What: /sys/fs/f2fs/<disk>/iostat_enable
@@ -270,7 +270,7 @@ Description: Shows all enabled kernel features.
inode_checksum, flexible_inline_xattr, quota_ino,
inode_crtime, lost_found, verity, sb_checksum,
casefold, readonly, compression, test_dummy_encryption_v2,
- atomic_write, pin_file, encrypted_casefold.
+ atomic_write, pin_file, encrypted_casefold, linear_lookup.
What: /sys/fs/f2fs/<disk>/inject_rate
Date: May 2016
@@ -311,10 +311,13 @@ Description: Do background GC aggressively when set. Set to 0 by default.
GC approach and turns SSR mode on.
gc urgent low(2): lowers the bar of checking I/O idling in
order to process outstanding discard commands and GC a
- little bit aggressively. uses cost benefit GC approach.
+ little bit aggressively. always uses cost benefit GC approach,
+ and will override age-threshold GC approach if ATGC is enabled
+ at the same time.
gc urgent mid(3): does GC forcibly in a period of given
gc_urgent_sleep_time and executes a mid level of I/O idling check.
- uses cost benefit GC approach.
+ always uses cost benefit GC approach, and will override
+ age-threshold GC approach if ATGC is enabled at the same time.
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
Date: August 2017
@@ -331,7 +334,7 @@ Date: January 2018
Contact: Jaegeuk Kim <jaegeuk@kernel.org>
Description: This indicates how many GC can be failed for the pinned
file. If it exceeds this, F2FS doesn't guarantee its pinning
- state. 2048 trials is set by default.
+ state. 2048 trials is set by default, and 65535 as maximum.
What: /sys/fs/f2fs/<disk>/extension_list
Date: February 2018
@@ -344,7 +347,7 @@ Description: Used to control configure extension list:
- [c] means add/del cold file extension
What: /sys/fs/f2fs/<disk>/unusable
-Date April 2019
+Date: April 2019
Contact: "Daniel Rosenberg" <drosen@google.com>
Description: If checkpoint=disable, it displays the number of blocks that
are unusable.
@@ -352,7 +355,7 @@ Description: If checkpoint=disable, it displays the number of blocks that
would be unusable if checkpoint=disable were to be set.
What: /sys/fs/f2fs/<disk>/encoding
-Date July 2019
+Date: July 2019
Contact: "Daniel Rosenberg" <drosen@google.com>
Description: Displays name and version of the encoding set for the filesystem.
If no encoding is set, displays (none)
@@ -579,6 +582,12 @@ Description: When ATGC is on, it controls age threshold to bypass GCing young
candidates whose age is not beyond the threshold, by default it was
initialized as 604800 seconds (equals to 7 days).
+What: /sys/fs/f2fs/<disk>/atgc_enabled
+Date: Feb 2024
+Contact: "Jinbao Liu" <liujinbao1@xiaomi.com>
+Description: It represents whether ATGC is on or off. The value is 1 which
+ indicates that ATGC is on, and 0 indicates that it is off.
+
What: /sys/fs/f2fs/<disk>/gc_reclaimed_segments
Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
@@ -634,6 +643,12 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Shows the number of unusable blocks in a section which was defined by
the zone capacity reported by underlying zoned device.
+What: /sys/fs/f2fs/<disk>/max_open_zones
+Date: November 2025
+Contact: "Yongpeng Yang" <yangyongpeng@xiaomi.com>
+Description: Shows the max number of zones that F2FS can write concurrently when a zoned
+ device is mounted.
+
What: /sys/fs/f2fs/<disk>/current_atomic_write
Date: July 2022
Contact: "Daeho Jeong" <daehojeong@google.com>
@@ -701,29 +716,34 @@ Description: Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
- =================== ===========
- Type_Name Type_Value
- =================== ===========
- FAULT_KMALLOC 0x000000001
- FAULT_KVMALLOC 0x000000002
- FAULT_PAGE_ALLOC 0x000000004
- FAULT_PAGE_GET 0x000000008
- FAULT_ALLOC_BIO 0x000000010 (obsolete)
- FAULT_ALLOC_NID 0x000000020
- FAULT_ORPHAN 0x000000040
- FAULT_BLOCK 0x000000080
- FAULT_DIR_DEPTH 0x000000100
- FAULT_EVICT_INODE 0x000000200
- FAULT_TRUNCATE 0x000000400
- FAULT_READ_IO 0x000000800
- FAULT_CHECKPOINT 0x000001000
- FAULT_DISCARD 0x000002000
- FAULT_WRITE_IO 0x000004000
- FAULT_SLAB_ALLOC 0x000008000
- FAULT_DQUOT_INIT 0x000010000
- FAULT_LOCK_OP 0x000020000
- FAULT_BLKADDR 0x000040000
- =================== ===========
+ =========================== ==========
+ Type_Name Type_Value
+ =========================== ==========
+ FAULT_KMALLOC 0x00000001
+ FAULT_KVMALLOC 0x00000002
+ FAULT_PAGE_ALLOC 0x00000004
+ FAULT_PAGE_GET 0x00000008
+ FAULT_ALLOC_BIO 0x00000010 (obsolete)
+ FAULT_ALLOC_NID 0x00000020
+ FAULT_ORPHAN 0x00000040
+ FAULT_BLOCK 0x00000080
+ FAULT_DIR_DEPTH 0x00000100
+ FAULT_EVICT_INODE 0x00000200
+ FAULT_TRUNCATE 0x00000400
+ FAULT_READ_IO 0x00000800
+ FAULT_CHECKPOINT 0x00001000
+ FAULT_DISCARD 0x00002000
+ FAULT_WRITE_IO 0x00004000
+ FAULT_SLAB_ALLOC 0x00008000
+ FAULT_DQUOT_INIT 0x00010000
+ FAULT_LOCK_OP 0x00020000
+ FAULT_BLKADDR_VALIDITY 0x00040000
+ FAULT_BLKADDR_CONSISTENCE 0x00080000
+ FAULT_NO_SEGMENT 0x00100000
+ FAULT_INCONSISTENT_FOOTER 0x00200000
+ FAULT_TIMEOUT 0x00400000 (1000ms)
+ FAULT_VMALLOC 0x00800000
+ =========================== ==========
What: /sys/fs/f2fs/<disk>/discard_io_aware_gran
Date: January 2023
@@ -761,3 +781,161 @@ Date: November 2023
Contact: "Chao Yu" <chao@kernel.org>
Description: It controls to enable/disable IO aware feature for background discard.
By default, the value is 1 which indicates IO aware is on.
+
+What: /sys/fs/f2fs/<disk>/blkzone_alloc_policy
+Date: July 2024
+Contact: "Yuanhong Liao" <liaoyuanhong@vivo.com>
+Description: The zone UFS we are currently using consists of two parts:
+ conventional zones and sequential zones. It can be used to control which part
+ to prioritize for writes, with a default value of 0.
+
+ ======================== =========================================
+ value description
+ blkzone_alloc_policy = 0 Prioritize writing to sequential zones
+ blkzone_alloc_policy = 1 Only allow writing to sequential zones
+ blkzone_alloc_policy = 2 Prioritize writing to conventional zones
+ ======================== =========================================
+
+What: /sys/fs/f2fs/<disk>/migration_window_granularity
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Controls migration window granularity of garbage collection on large
+ section. it can control the scanning window granularity for GC migration
+ in a unit of segment, while migration_granularity controls the number
+ of segments which can be migrated at the same turn.
+
+What: /sys/fs/f2fs/<disk>/reserved_segments
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: In order to fine tune GC behavior, we can control the number of
+ reserved segments.
+
+What: /sys/fs/f2fs/<disk>/gc_no_zoned_gc_percent
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: If the percentage of free sections over total sections is above this
+ number, F2FS do not garbage collection for zoned devices through the
+ background GC thread. the default number is "60".
+
+What: /sys/fs/f2fs/<disk>/gc_boost_zoned_gc_percent
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: If the percentage of free sections over total sections is under this
+ number, F2FS boosts garbage collection for zoned devices through the
+ background GC thread. the default number is "25".
+
+What: /sys/fs/f2fs/<disk>/gc_valid_thresh_ratio
+Date: September 2024
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: It controls the valid block ratio threshold not to trigger excessive GC
+ for zoned devices. The initial value of it is 95(%). F2FS will stop the
+ background GC thread from initiating GC for sections having valid blocks
+ exceeding the ratio.
+
+What: /sys/fs/f2fs/<disk>/max_read_extent_count
+Date: November 2024
+Contact: "Chao Yu" <chao@kernel.org>
+Description: It controls max read extent count for per-inode, the value of threshold
+ is 10240 by default.
+
+What: /sys/fs/f2fs/tuning/reclaim_caches_kb
+Date: February 2025
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: It reclaims the given KBs of file-backed pages registered by
+ ioctl(F2FS_IOC_DONATE_RANGE).
+ For example, writing N tries to drop N KBs spaces in LRU.
+
+What: /sys/fs/f2fs/<disk>/carve_out
+Date: March 2025
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: For several zoned storage devices, vendors will provide extra space which
+ was used for device level GC than specs and F2FS can use this space for
+ filesystem level GC. To do that, we can reserve the space using
+ reserved_blocks. However, it is not enough, since this extra space should
+ not be shown to users. So, with this new sysfs node, we can hide the space
+ by subtracting reserved_blocks from total bytes.
+
+What: /sys/fs/f2fs/<disk>/encoding_flags
+Date: April 2025
+Contact: "Chao Yu" <chao@kernel.org>
+Description: This is a read-only entry to show the value of sb.s_encoding_flags, the
+ value is hexadecimal.
+
+ ============================ ==========
+ Flag_Name Flag_Value
+ ============================ ==========
+ SB_ENC_STRICT_MODE_FL 0x00000001
+ SB_ENC_NO_COMPAT_FALLBACK_FL 0x00000002
+ ============================ ==========
+
+What: /sys/fs/f2fs/<disk>/reserved_pin_section
+Date: June 2025
+Contact: "Chao Yu" <chao@kernel.org>
+Description: This threshold is used to control triggering garbage collection while
+ fallocating on pinned file, so, it can guarantee there is enough free
+ reserved section before preallocating on pinned file.
+ By default, the value is ovp_sections, especially, for zoned ufs, the
+ value is 1.
+
+What: /sys/fs/f2fs/<disk>/gc_boost_gc_multiple
+Date: June 2025
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Set a multiplier for the background GC migration window when F2FS GC is
+ boosted. The range should be from 1 to the segment count in a section.
+ Default: 5
+
+What: /sys/fs/f2fs/<disk>/gc_boost_gc_greedy
+Date: June 2025
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Control GC algorithm for boost GC. 0: cost benefit, 1: greedy
+ Default: 1
+
+What: /sys/fs/f2fs/<disk>/effective_lookup_mode
+Date: August 2025
+Contact: "Daniel Lee" <chullee@google.com>
+Description:
+ This is a read-only entry to show the effective directory lookup mode
+ F2FS is currently using for casefolded directories.
+ This considers both the "lookup_mode" mount option and the on-disk
+ encoding flag, SB_ENC_NO_COMPAT_FALLBACK_FL.
+
+ Possible values are:
+ - "perf": Hash-only lookup.
+ - "compat": Hash-based lookup with a linear search fallback enabled
+ - "auto:perf": lookup_mode is auto and fallback is disabled on-disk
+ - "auto:compat": lookup_mode is auto and fallback is enabled on-disk
+
+What: /sys/fs/f2fs/<disk>/bggc_io_aware
+Date: August 2025
+Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
+Description: Used to adjust the BG_GC priority when pending IO, with a default value
+ of 0. Specifically, for ZUFS, the default value is 1.
+
+ ================== ======================================================
+ value description
+ bggc_io_aware = 0 skip background GC if there is any kind of pending IO
+ bggc_io_aware = 1 skip background GC if there is pending read IO
+ bggc_io_aware = 2 don't aware IO for background GC
+ ================== ======================================================
+
+What: /sys/fs/f2fs/<disk>/allocate_section_hint
+Date: August 2025
+Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
+Description: Indicates the hint section between the first device and others in multi-devices
+ setup. It defaults to the end of the first device in sections. For a single storage
+ device, it defaults to the total number of sections. It can be manually set to match
+ scenarios where multi-devices are mapped to the same dm device.
+
+What: /sys/fs/f2fs/<disk>/allocate_section_policy
+Date: August 2025
+Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
+Description: Controls write priority in multi-devices setups. A value of 0 means normal writing.
+ A value of 1 prioritizes writing to devices before the allocate_section_hint. A value of 2
+ prioritizes writing to devices after the allocate_section_hint. The default is 0.
+
+ =========================== ==========================================================
+ value description
+ allocate_section_policy = 0 Normal writing
+ allocate_section_policy = 1 Prioritize writing to section before allocate_section_hint
+ allocate_section_policy = 2 Prioritize writing to section after allocate_section_hint
+ =========================== ==========================================================
diff --git a/Documentation/ABI/testing/sysfs-fs-virtiofs b/Documentation/ABI/testing/sysfs-fs-virtiofs
new file mode 100644
index 000000000000..4839dbce997e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-virtiofs
@@ -0,0 +1,11 @@
+What: /sys/fs/virtiofs/<n>/tag
+Date: Feb 2024
+Contact: virtio-fs@lists.linux.dev
+Description:
+ [RO] The mount "tag" that can be used to mount this filesystem.
+
+What: /sys/fs/virtiofs/<n>/device
+Date: Feb 2024
+Contact: virtio-fs@lists.linux.dev
+Description:
+ Symlink to the virtio device that exports this filesystem.
diff --git a/Documentation/ABI/testing/sysfs-fs-xfs b/Documentation/ABI/testing/sysfs-fs-xfs
index f704925f6fe9..7da4de948b46 100644
--- a/Documentation/ABI/testing/sysfs-fs-xfs
+++ b/Documentation/ABI/testing/sysfs-fs-xfs
@@ -1,7 +1,7 @@
What: /sys/fs/xfs/<disk>/log/log_head_lsn
Date: July 2014
KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+Contact: linux-xfs@vger.kernel.org
Description:
The log sequence number (LSN) of the current head of the
log. The LSN is exported in "cycle:basic block" format.
@@ -10,30 +10,28 @@ Users: xfstests
What: /sys/fs/xfs/<disk>/log/log_tail_lsn
Date: July 2014
KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+Contact: linux-xfs@vger.kernel.org
Description:
The log sequence number (LSN) of the current tail of the
log. The LSN is exported in "cycle:basic block" format.
-What: /sys/fs/xfs/<disk>/log/reserve_grant_head
-Date: July 2014
-KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+What: /sys/fs/xfs/<disk>/log/reserve_grant_head_bytes
+Date: June 2024
+KernelVersion: 6.11
+Contact: linux-xfs@vger.kernel.org
Description:
The current state of the log reserve grant head. It
represents the total log reservation of all currently
- outstanding transactions. The grant head is exported in
- "cycle:bytes" format.
+ outstanding transactions in bytes.
Users: xfstests
-What: /sys/fs/xfs/<disk>/log/write_grant_head
-Date: July 2014
-KernelVersion: 3.17
-Contact: xfs@oss.sgi.com
+What: /sys/fs/xfs/<disk>/log/write_grant_head_bytes
+Date: June 2024
+KernelVersion: 6.11
+Contact: linux-xfs@vger.kernel.org
Description:
The current state of the log write grant head. It
represents the total log reservation of all currently
outstanding transactions, including regrants due to
- rolling transactions. The grant head is exported in
- "cycle:bytes" format.
+ rolling transactions in bytes.
Users: xfstests
diff --git a/Documentation/ABI/testing/sysfs-kernel-address_bits b/Documentation/ABI/testing/sysfs-kernel-address_bits
index 5d09ff84d4d6..3b72e48086aa 100644
--- a/Documentation/ABI/testing/sysfs-kernel-address_bits
+++ b/Documentation/ABI/testing/sysfs-kernel-address_bits
@@ -1,4 +1,4 @@
-What: /sys/kernel/address_bit
+What: /sys/kernel/address_bits
Date: May 2023
KernelVersion: 6.3
Contact: Thomas Weißschuh <linux@weissschuh.net>
diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump
index 8f7a64a81783..b64b7622e6fc 100644
--- a/Documentation/ABI/testing/sysfs-kernel-fadump
+++ b/Documentation/ABI/testing/sysfs-kernel-fadump
@@ -38,3 +38,22 @@ Contact: linuxppc-dev@lists.ozlabs.org
Description: read only
Provide information about the amount of memory reserved by
FADump to save the crash dump in bytes.
+
+What: /sys/kernel/fadump/hotplug_ready
+Date: Apr 2024
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Kdump udev rule re-registers fadump on memory add/remove events,
+ primarily to update the elfcorehdr. This sysfs indicates the
+ kdump udev rule that fadump re-registration is not required on
+ memory add/remove events because elfcorehdr is now prepared in
+ the second/fadump kernel.
+User: kexec-tools
+
+What: /sys/kernel/fadump/bootargs_append
+Date: May 2024
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ This is a special sysfs file available to setup additional
+ parameters to be passed to capture kernel. For HASH MMU it
+ is exported only if RMA size higher than 768MB.
diff --git a/Documentation/ABI/testing/sysfs-kernel-hardlockup_count b/Documentation/ABI/testing/sysfs-kernel-hardlockup_count
new file mode 100644
index 000000000000..dfdd4078b077
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-hardlockup_count
@@ -0,0 +1,7 @@
+What: /sys/kernel/hardlockup_count
+Date: May 2025
+KernelVersion: 6.16
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ Shows how many times the system has detected a hard lockup since last boot.
+ Available only if CONFIG_HARDLOCKUP_DETECTOR is enabled.
diff --git a/Documentation/ABI/testing/sysfs-kernel-kexec-kdump b/Documentation/ABI/testing/sysfs-kernel-kexec-kdump
new file mode 100644
index 000000000000..f59051b5d96d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-kexec-kdump
@@ -0,0 +1,61 @@
+What: /sys/kernel/kexec/*
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description:
+ The /sys/kernel/kexec/* directory contains sysfs files
+ that provide information about the configuration status
+ of kexec and kdump.
+
+What: /sys/kernel/kexec/loaded
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates whether a new kernel image has been loaded
+ into memory using the kexec system call. It shows 1 if
+ a kexec image is present and ready to boot, or 0 if none
+ is loaded.
+User: kexec tools, kdump service
+
+What: /sys/kernel/kexec/crash_loaded
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates whether a crash (kdump) kernel is currently
+ loaded into memory. It shows 1 if a crash kernel has been
+ successfully loaded for panic handling, or 0 if no crash
+ kernel is present.
+User: Kexec tools, Kdump service
+
+What: /sys/kernel/kexec/crash_size
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read/write
+ Shows the amount of memory reserved for loading the crash
+ (kdump) kernel. It reports the size, in bytes, of the
+ crash kernel area defined by the crashkernel= parameter.
+ This interface also allows reducing the crashkernel
+ reservation by writing a smaller value, and the reclaimed
+ space is added back to the system RAM.
+User: Kdump service
+
+What: /sys/kernel/kexec/crash_elfcorehdr_size
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Indicates the preferred size of the memory buffer for the
+ ELF core header used by the crash (kdump) kernel. It defines
+ how much space is needed to hold metadata about the crashed
+ system, including CPU and memory information. This information
+ is used by the user space utility kexec to support updating the
+ in-kernel kdump image during hotplug operations.
+User: Kexec tools
+
+What: /sys/kernel/kexec/crash_cma_ranges
+Date: Nov 2025
+Contact: kexec@lists.infradead.org
+Description: read only
+ Provides information about the memory ranges reserved from
+ the Contiguous Memory Allocator (CMA) area that are allocated
+ to the crash (kdump) kernel. It lists the start and end physical
+ addresses of CMA regions assigned for crashkernel use.
+User: kdump service
diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch b/Documentation/ABI/testing/sysfs-kernel-livepatch
index a5df9b4910dc..3c3f36b32b57 100644
--- a/Documentation/ABI/testing/sysfs-kernel-livepatch
+++ b/Documentation/ABI/testing/sysfs-kernel-livepatch
@@ -47,6 +47,23 @@ Description:
disabled when the feature is used. See
Documentation/livepatch/livepatch.rst for more information.
+What: /sys/kernel/livepatch/<patch>/replace
+Date: Jun 2024
+KernelVersion: 6.11.0
+Contact: live-patching@vger.kernel.org
+Description:
+ An attribute which indicates whether the patch supports
+ atomic-replace.
+
+What: /sys/kernel/livepatch/<patch>/stack_order
+Date: Jan 2025
+KernelVersion: 6.14.0
+Description:
+ This attribute specifies the sequence in which live patch modules
+ are applied to the system. If multiple live patches modify the same
+ function, the implementation with the biggest 'stack_order' number
+ is used, unless a transition is currently in progress.
+
What: /sys/kernel/livepatch/<patch>/<object>
Date: Nov 2014
KernelVersion: 3.19.0
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma b/Documentation/ABI/testing/sysfs-kernel-mm-cma
index 02b2bb60c296..aaf2a5d8b13b 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-cma
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma
@@ -23,3 +23,22 @@ Date: Feb 2021
Contact: Minchan Kim <minchan@kernel.org>
Description:
the number of pages CMA API failed to allocate
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/release_pages_success
+Date: Feb 2024
+Contact: Anshuman Khandual <anshuman.khandual@arm.com>
+Description:
+ the number of pages CMA API succeeded to release
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/total_pages
+Date: Jun 2024
+Contact: Frank van der Linden <fvdl@google.com>
+Description:
+ The size of the CMA area in pages.
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/available_pages
+Date: Jun 2024
+Contact: Frank van der Linden <fvdl@google.com>
+Description:
+ The number of pages in the CMA area that are still
+ available for CMA allocation.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon
index bfa5b8288d8d..4fb8b7a6d625 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-damon
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -34,7 +34,9 @@ Description: Writing 'on' or 'off' to this file makes the kdamond starts or
kdamond. Writing 'update_schemes_tried_bytes' to the file
updates only '.../tried_regions/total_bytes' files of this
kdamond. Writing 'clear_schemes_tried_regions' to the file
- removes contents of the 'tried_regions' directory.
+ removes contents of the 'tried_regions' directory. Writing
+ 'update_schemes_effective_quotas' to the file updates
+ '.../quotas/effective_bytes' files of this kdamond.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
Date: Mar 2022
@@ -42,6 +44,13 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Reading this file returns the pid of the kdamond if it is
running.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/refresh_ms
+Date: Jul 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the time interval for
+ automatic DAMON status file contents update. Writing '0'
+ disables the update. Reading this file returns the value.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/nr_contexts
Date: Mar 2022
Contact: SeongJae Park <sj@kernel.org>
@@ -68,6 +77,13 @@ Description: Writing a keyword for a monitoring operations set ('vaddr' for
Note that only the operations sets that listed in
'avail_operations' file are valid inputs.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/addr_unit
+Date: Aug 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing an integer to this file sets the 'address unit'
+ parameter of the given operations set of the context. Reading
+ the file returns the last-written 'address unit' value.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/sample_us
Date: Mar 2022
Contact: SeongJae Park <sj@kernel.org>
@@ -89,6 +105,36 @@ Description: Writing a value to this file sets the update interval of the
DAMON context in microseconds as the value. Reading this file
returns the value.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/intrvals_goal/access_bp
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the monitoring intervals
+ auto-tuning target DAMON-observed access events ratio within
+ the given time interval (aggrs in same directory), in bp
+ (1/10,000). Reading this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/intrvals_goal/aggrs
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the time interval to achieve
+ the monitoring intervals auto-tuning target DAMON-observed
+ access events ratio (access_bp in same directory) within.
+ Reading this file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/intrvals_goal/min_sample_us
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the minimum value of
+ auto-tuned sampling interval in microseconds. Reading this
+ file returns the value.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/intrvals_goal/max_sample_us
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a value to this file sets the maximum value of
+ auto-tuned sampling interval in microseconds. Reading this
+ file returns the value.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/min
WDate: Mar 2022
@@ -118,6 +164,13 @@ Description: Writing to and reading from this file sets and gets the pid of
the target process if the context is for virtual address spaces
monitoring, respectively.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/obsolete_target
+Date: Oct 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the
+ obsoleteness of the matching parameters commit destination
+ target.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/nr_regions
Date: Mar 2022
Contact: SeongJae Park <sj@kernel.org>
@@ -153,6 +206,12 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Writing to and reading from this file sets and gets the action
of the scheme.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/target_nid
+Date: Jun 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Action's target NUMA node id. Supported by only relevant
+ actions.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/apply_interval_us
Date: Sep 2023
Contact: SeongJae Park <sj@kernel.org>
@@ -208,6 +267,12 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Writing to and reading from this file sets and gets the size
quota of the scheme in bytes.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/effective_bytes
+Date: Feb 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading from this file gets the effective size quota of the
+ scheme in bytes, which adjusted for the time quota and goals.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/reset_interval_ms
Date: Mar 2022
Contact: SeongJae Park <sj@kernel.org>
@@ -221,6 +286,12 @@ Description: Writing a number 'N' to this file creates the number of
directories for setting automatic tuning of the scheme's
aggressiveness named '0' to 'N-1' under the goals/ directory.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/target_metric
+Date: Feb 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the quota
+ auto-tuning goal metric.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/target_value
Date: Nov 2023
Contact: SeongJae Park <sj@kernel.org>
@@ -233,6 +304,18 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Writing to and reading from this file sets and gets the current
value of the goal metric.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/nid
+Date: Apr 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the nid
+ parameter of the goal.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/path
+Date: Oct 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the path
+ parameter of the goal.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
Date: Mar 2022
Contact: SeongJae Park <sj@kernel.org>
@@ -300,9 +383,9 @@ Date: Dec 2022
Contact: SeongJae Park <sj@kernel.org>
Description: Writing to and reading from this file sets and gets the type of
the memory of the interest. 'anon' for anonymous pages,
- 'memcg' for specific memory cgroup, 'addr' for address range
- (an open-ended interval), or 'target' for DAMON monitoring
- target can be written and read.
+ 'memcg' for specific memory cgroup, 'young' for young pages,
+ 'addr' for address range (an open-ended interval), or 'target'
+ for DAMON monitoring target can be written and read.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/memcg_path
Date: Dec 2022
@@ -325,6 +408,20 @@ Description: If 'addr' is written to the 'type' file, writing to or reading
from this file sets or gets the end address of the address
range for the filter.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/min
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: If 'hugepage_size' is written to the 'type' file, writing to
+ or reading from this file sets or gets the minimum size of the
+ hugepage for the filter.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/max
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: If 'hugepage_size' is written to the 'type' file, writing to
+ or reading from this file sets or gets the maximum size of the
+ hugepage for the filter.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/target_idx
Date: Dec 2022
Contact: SeongJae Park <sj@kernel.org>
@@ -335,10 +432,53 @@ Description: If 'target' is written to the 'type' file, writing to or
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/matching
Date: Dec 2022
Contact: SeongJae Park <sj@kernel.org>
-Description: Writing 'Y' or 'N' to this file sets whether to filter out
- pages that do or do not match to the 'type' and 'memcg_path',
- respectively. Filter out means the action of the scheme will
- not be applied to.
+Description: Writing 'Y' or 'N' to this file sets whether the filter is for
+ the memory of the 'type', or all except the 'type'.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/allow
+Date: Jan 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing 'Y' or 'N' to this file sets whether to allow or reject
+ applying the scheme's action to the memory that satisfies the
+ 'type' and the 'matching' of the directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/core_filters
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Directory for DAMON core layer-handled DAMOS filters. Files
+ under this directory works same to those of
+ /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters
+ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/ops_filters
+Date: Feb 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Directory for DAMON operations set layer-handled DAMOS filters.
+ Files under this directory works same to those of
+ /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters
+ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/nr_dests
+Date: Jul 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing a number 'N' to this file creates the number of
+ directories for setting action destinations of the scheme named
+ '0' to 'N-1' under the dests/ directory.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/<D>/id
+Date: Jul 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the id of
+ the DAMOS action destination. For DAMOS_MIGRATE_{HOT,COLD}
+ actions, the destination node's node id can be written and
+ read.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/dests/<D>/weight
+Date: Jul 2025
+Contact: SeongJae Park <sj@kernel.org>
+Description: Writing to and reading from this file sets and gets the weight
+ of the DAMOS action destination to select as the destination of
+ each action among the destinations.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
Date: Mar 2022
@@ -364,6 +504,12 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Reading this file returns the total size of regions that the
action of the scheme has successfully applied in bytes.
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_ops_filter_passed
+Date: Dec 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the total size of memory that passed
+ DAMON operations layer-handled filters of the scheme in bytes.
+
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
Date: Mar 2022
Contact: SeongJae Park <sj@kernel.org>
@@ -404,3 +550,10 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Reading this file returns the 'age' of a memory region that
corresponding DAMON-based Operation Scheme's action has tried
to be applied.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/sz_filter_passed
+Date: Dec 2024
+Contact: SeongJae Park <sj@kernel.org>
+Description: Reading this file returns the size of the memory in the region
+ that passed DAMON operations layer-handled filters of the
+ scheme in bytes.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy
new file mode 100644
index 000000000000..8ac327fd7fb6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy
@@ -0,0 +1,4 @@
+What: /sys/kernel/mm/mempolicy/
+Date: January 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Interface for Mempolicy
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
new file mode 100644
index 000000000000..649c0e9b895c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
@@ -0,0 +1,54 @@
+What: /sys/kernel/mm/mempolicy/weighted_interleave/
+Date: January 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Configuration Interface for the Weighted Interleave policy
+
+What: /sys/kernel/mm/mempolicy/weighted_interleave/nodeN
+Date: January 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Weight configuration interface for nodeN
+
+ The interleave weight for a memory node (N). These weights are
+ utilized by tasks which have set their mempolicy to
+ MPOL_WEIGHTED_INTERLEAVE.
+
+ These weights only affect new allocations, and changes at runtime
+ will not cause migrations on already allocated pages.
+
+ The minimum weight for a node is always 1.
+
+ Minimum weight: 1
+ Maximum weight: 255
+
+ Writing invalid values (i.e. any values not in [1,255],
+ empty string, ...) will return -EINVAL.
+
+ Changing the weight to a valid value will automatically
+ switch the system to manual mode as well.
+
+What: /sys/kernel/mm/mempolicy/weighted_interleave/auto
+Date: May 2025
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Auto-weighting configuration interface
+
+ Configuration mode for weighted interleave. 'true' indicates
+ that the system is in auto mode, and a 'false' indicates that
+ the system is in manual mode.
+
+ In auto mode, all node weights are re-calculated and overwritten
+ (visible via the nodeN interfaces) whenever new bandwidth data
+ is made available during either boot or hotplug events.
+
+ In manual mode, node weights can only be updated by the user.
+ Note that nodes that are onlined with previously set weights
+ will reuse those weights. If they were not previously set or
+ are onlined with missing bandwidth data, the weights will use
+ a default weight of 1.
+
+ Writing any true value string (e.g. Y or 1) will enable auto
+ mode, while writing any false value string (e.g. N or 0) will
+ enable manual mode. All other strings are ignored and will
+ return -EINVAL.
+
+ Writing a new weight to a node directly via the nodeN interface
+ will also automatically switch the system to manual mode.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-numa b/Documentation/ABI/testing/sysfs-kernel-mm-numa
index 77e559d4ed80..90e375ff54cb 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-numa
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-numa
@@ -16,9 +16,13 @@ Description: Enable/disable demoting pages during reclaim
Allowing page migration during reclaim enables these
systems to migrate pages from fast tiers to slow tiers
when the fast tier is under pressure. This migration
- is performed before swap. It may move data to a NUMA
- node that does not fall into the cpuset of the
- allocating process which might be construed to violate
- the guarantees of cpusets. This should not be enabled
- on systems which need strict cpuset location
- guarantees.
+ is performed before swap if an eligible numa node is
+ present in cpuset.mems for the cgroup (or if cpuset v1
+ is being used). If cpusets.mems changes at runtime, it
+ may move data to a NUMA node that does not fall into the
+ cpuset of the new cpusets.mems, which might be construed
+ to violate the guarantees of cpusets. Shared memory,
+ such as libraries, owned by another cgroup may still be
+ demoted and result in memory use on a node not present
+ in cpusets.mem. This should not be enabled on systems
+ which need strict cpuset location guarantees.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage b/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
new file mode 100644
index 000000000000..7bfbb9cc2c11
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
@@ -0,0 +1,18 @@
+What: /sys/kernel/mm/transparent_hugepage/
+Date: April 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description:
+ /sys/kernel/mm/transparent_hugepage/ contains a number of files and
+ subdirectories,
+
+ - defrag
+ - enabled
+ - hpage_pmd_size
+ - khugepaged
+ - shmem_enabled
+ - use_zero_page
+ - subdirectories of the form hugepages-<size>kB, where <size>
+ is the page size of the hugepages supported by the kernel/CPU
+ combination.
+
+ See Documentation/admin-guide/mm/transhuge.rst for details.
diff --git a/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
new file mode 100644
index 000000000000..a4a97a7f4a4d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
@@ -0,0 +1,6 @@
+What: /sys/kernel/rcu_stall_count
+Date: May 2025
+KernelVersion: 6.16
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ Shows how many times the system has detected an RCU stall since last boot.
diff --git a/Documentation/ABI/testing/sysfs-kernel-reboot b/Documentation/ABI/testing/sysfs-kernel-reboot
index 837330fb2511..52571fd5ddba 100644
--- a/Documentation/ABI/testing/sysfs-kernel-reboot
+++ b/Documentation/ABI/testing/sysfs-kernel-reboot
@@ -1,7 +1,7 @@
What: /sys/kernel/reboot
Date: November 2020
KernelVersion: 5.11
-Contact: Matteo Croce <mcroce@microsoft.com>
+Contact: Matteo Croce <teknoraver@meta.com>
Description: Interface to set the kernel reboot behavior, similarly to
what can be done via the reboot= cmdline option.
(see Documentation/admin-guide/kernel-parameters.txt)
@@ -9,24 +9,32 @@ Description: Interface to set the kernel reboot behavior, similarly to
What: /sys/kernel/reboot/mode
Date: November 2020
KernelVersion: 5.11
-Contact: Matteo Croce <mcroce@microsoft.com>
+Contact: Matteo Croce <teknoraver@meta.com>
Description: Reboot mode. Valid values are: cold warm hard soft gpio
What: /sys/kernel/reboot/type
Date: November 2020
KernelVersion: 5.11
-Contact: Matteo Croce <mcroce@microsoft.com>
+Contact: Matteo Croce <teknoraver@meta.com>
Description: Reboot type. Valid values are: bios acpi kbd triple efi pci
What: /sys/kernel/reboot/cpu
Date: November 2020
KernelVersion: 5.11
-Contact: Matteo Croce <mcroce@microsoft.com>
+Contact: Matteo Croce <teknoraver@meta.com>
Description: CPU number to use to reboot.
What: /sys/kernel/reboot/force
Date: November 2020
KernelVersion: 5.11
-Contact: Matteo Croce <mcroce@microsoft.com>
+Contact: Matteo Croce <teknoraver@meta.com>
Description: Don't wait for any other CPUs on reboot and
avoid anything that could hang.
+
+What: /sys/kernel/reboot/hw_protection
+Date: April 2025
+KernelVersion: 6.15
+Contact: Ahmad Fatoum <a.fatoum@pengutronix.de>
+Description: Hardware protection action taken on critical events like
+ overtemperature or imminent voltage loss.
+ Valid values are: reboot shutdown
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index cd5fb8fa3ddf..b26e4299f822 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -2,7 +2,7 @@ What: /sys/kernel/slab
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The /sys/kernel/slab directory contains a snapshot of the
internal state of the SLUB allocator for each cache. Certain
@@ -14,7 +14,7 @@ What: /sys/kernel/slab/<cache>/aliases
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The aliases file is read-only and specifies how many caches
have merged into this cache.
@@ -23,7 +23,7 @@ What: /sys/kernel/slab/<cache>/align
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The align file is read-only and specifies the cache's object
alignment in bytes.
@@ -32,18 +32,19 @@ What: /sys/kernel/slab/<cache>/alloc_calls
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_calls file is read-only and lists the kernel code
locations from which allocations for this cache were performed.
The alloc_calls file only contains information if debugging is
- enabled for that cache (see Documentation/mm/slub.rst).
+ enabled for that cache (see
+ Documentation/admin-guide/mm/slab.rst).
What: /sys/kernel/slab/<cache>/alloc_fastpath
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_fastpath file shows how many objects have been
allocated using the fast path. It can be written to clear the
@@ -54,7 +55,7 @@ What: /sys/kernel/slab/<cache>/alloc_from_partial
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_from_partial file shows how many times a cpu slab has
been full and it has been refilled by using a slab from the list
@@ -66,7 +67,7 @@ What: /sys/kernel/slab/<cache>/alloc_refill
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_refill file shows how many times the per-cpu freelist
was empty but there were objects available as the result of
@@ -77,7 +78,7 @@ What: /sys/kernel/slab/<cache>/alloc_slab
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_slab file is shows how many times a new slab had to
be allocated from the page allocator. It can be written to
@@ -88,7 +89,7 @@ What: /sys/kernel/slab/<cache>/alloc_slowpath
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The alloc_slowpath file shows how many objects have been
allocated using the slow path because of a refill or
@@ -100,7 +101,7 @@ What: /sys/kernel/slab/<cache>/cache_dma
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The cache_dma file is read-only and specifies whether objects
are from ZONE_DMA.
@@ -110,7 +111,7 @@ What: /sys/kernel/slab/<cache>/cpu_slabs
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The cpu_slabs file is read-only and displays how many cpu slabs
are active and their NUMA locality.
@@ -119,7 +120,7 @@ What: /sys/kernel/slab/<cache>/cpuslab_flush
Date: April 2009
KernelVersion: 2.6.31
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The file cpuslab_flush shows how many times a cache's cpu slabs
have been flushed as the result of destroying or shrinking a
@@ -132,7 +133,7 @@ What: /sys/kernel/slab/<cache>/ctor
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The ctor file is read-only and specifies the cache's object
constructor function, which is invoked for each object when a
@@ -142,7 +143,7 @@ What: /sys/kernel/slab/<cache>/deactivate_empty
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_empty file shows how many times an empty cpu slab
was deactivated. It can be written to clear the current count.
@@ -152,7 +153,7 @@ What: /sys/kernel/slab/<cache>/deactivate_full
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_full file shows how many times a full cpu slab
was deactivated. It can be written to clear the current count.
@@ -162,7 +163,7 @@ What: /sys/kernel/slab/<cache>/deactivate_remote_frees
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_remote_frees file shows how many times a cpu slab
has been deactivated and contained free objects that were freed
@@ -173,7 +174,7 @@ What: /sys/kernel/slab/<cache>/deactivate_to_head
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_to_head file shows how many times a partial cpu
slab was deactivated and added to the head of its node's partial
@@ -184,7 +185,7 @@ What: /sys/kernel/slab/<cache>/deactivate_to_tail
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The deactivate_to_tail file shows how many times a partial cpu
slab was deactivated and added to the tail of its node's partial
@@ -195,7 +196,7 @@ What: /sys/kernel/slab/<cache>/destroy_by_rcu
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The destroy_by_rcu file is read-only and specifies whether
slabs (not objects) are freed by rcu.
@@ -204,7 +205,7 @@ What: /sys/kernel/slab/<cache>/free_add_partial
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_add_partial file shows how many times an object has
been freed in a full slab so that it had to added to its node's
@@ -215,17 +216,17 @@ What: /sys/kernel/slab/<cache>/free_calls
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_calls file is read-only and lists the locations of
object frees if slab debugging is enabled (see
- Documentation/mm/slub.rst).
+ Documentation/admin-guide/mm/slab.rst).
What: /sys/kernel/slab/<cache>/free_fastpath
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_fastpath file shows how many objects have been freed
using the fast path because it was an object from the cpu slab.
@@ -236,7 +237,7 @@ What: /sys/kernel/slab/<cache>/free_frozen
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_frozen file shows how many objects have been freed to
a frozen slab (i.e. a remote cpu slab). It can be written to
@@ -247,7 +248,7 @@ What: /sys/kernel/slab/<cache>/free_remove_partial
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_remove_partial file shows how many times an object has
been freed to a now-empty slab so that it had to be removed from
@@ -259,7 +260,7 @@ What: /sys/kernel/slab/<cache>/free_slab
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_slab file shows how many times an empty slab has been
freed back to the page allocator. It can be written to clear
@@ -270,7 +271,7 @@ What: /sys/kernel/slab/<cache>/free_slowpath
Date: February 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The free_slowpath file shows how many objects have been freed
using the slow path (i.e. to a full or partial slab). It can
@@ -281,7 +282,7 @@ What: /sys/kernel/slab/<cache>/hwcache_align
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The hwcache_align file is read-only and specifies whether
objects are aligned on cachelines.
@@ -301,7 +302,7 @@ What: /sys/kernel/slab/<cache>/object_size
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The object_size file is read-only and specifies the cache's
object size.
@@ -310,7 +311,7 @@ What: /sys/kernel/slab/<cache>/objects
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The objects file is read-only and displays how many objects are
active and from which nodes they are from.
@@ -319,7 +320,7 @@ What: /sys/kernel/slab/<cache>/objects_partial
Date: April 2008
KernelVersion: 2.6.26
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The objects_partial file is read-only and displays how many
objects are on partial slabs and from which nodes they are
@@ -329,7 +330,7 @@ What: /sys/kernel/slab/<cache>/objs_per_slab
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The file objs_per_slab is read-only and specifies how many
objects may be allocated from a single slab of the order
@@ -339,7 +340,7 @@ What: /sys/kernel/slab/<cache>/order
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The order file specifies the page order at which new slabs are
allocated. It is writable and can be changed to increase the
@@ -356,7 +357,7 @@ What: /sys/kernel/slab/<cache>/order_fallback
Date: April 2008
KernelVersion: 2.6.26
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The order_fallback file shows how many times an allocation of a
new slab has not been possible at the cache's order and instead
@@ -369,7 +370,7 @@ What: /sys/kernel/slab/<cache>/partial
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The partial file is read-only and displays how long many
partial slabs there are and how long each node's list is.
@@ -378,7 +379,7 @@ What: /sys/kernel/slab/<cache>/poison
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The poison file specifies whether objects should be poisoned
when a new slab is allocated.
@@ -387,7 +388,7 @@ What: /sys/kernel/slab/<cache>/reclaim_account
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The reclaim_account file specifies whether the cache's objects
are reclaimable (and grouped by their mobility).
@@ -396,7 +397,7 @@ What: /sys/kernel/slab/<cache>/red_zone
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The red_zone file specifies whether the cache's objects are red
zoned.
@@ -405,7 +406,7 @@ What: /sys/kernel/slab/<cache>/remote_node_defrag_ratio
Date: January 2008
KernelVersion: 2.6.25
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The file remote_node_defrag_ratio specifies the percentage of
times SLUB will attempt to refill the cpu slab with a partial
@@ -419,7 +420,7 @@ What: /sys/kernel/slab/<cache>/sanity_checks
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The sanity_checks file specifies whether expensive checks
should be performed on free and, at minimum, enables double free
@@ -430,7 +431,7 @@ What: /sys/kernel/slab/<cache>/shrink
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The shrink file is used to reclaim unused slab cache
memory from a cache. Empty per-cpu or partial slabs
@@ -446,7 +447,7 @@ What: /sys/kernel/slab/<cache>/slab_size
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The slab_size file is read-only and specifies the object size
with metadata (debugging information and alignment) in bytes.
@@ -455,7 +456,7 @@ What: /sys/kernel/slab/<cache>/slabs
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The slabs file is read-only and displays how long many slabs
there are (both cpu and partial) and from which nodes they are
@@ -465,7 +466,7 @@ What: /sys/kernel/slab/<cache>/store_user
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The store_user file specifies whether the location of
allocation or free should be tracked for a cache.
@@ -474,7 +475,7 @@ What: /sys/kernel/slab/<cache>/total_objects
Date: April 2008
KernelVersion: 2.6.26
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The total_objects file is read-only and displays how many total
objects a cache has and from which nodes they are from.
@@ -483,7 +484,7 @@ What: /sys/kernel/slab/<cache>/trace
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
The trace file specifies whether object allocations and frees
should be traced.
@@ -492,7 +493,7 @@ What: /sys/kernel/slab/<cache>/validate
Date: May 2007
KernelVersion: 2.6.22
Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
- Christoph Lameter <cl@linux-foundation.org>
+ Christoph Lameter <cl@gentwo.org>
Description:
Writing to the validate file causes SLUB to traverse all of its
cache's objects and check the validity of metadata.
@@ -506,14 +507,14 @@ Description:
What: /sys/kernel/slab/<cache>/slabs_cpu_partial
Date: Aug 2011
-Contact: Christoph Lameter <cl@linux.com>
+Contact: Christoph Lameter <cl@gentwo.org>
Description:
This read-only file shows the number of partialli allocated
frozen slabs.
What: /sys/kernel/slab/<cache>/cpu_partial
Date: Aug 2011
-Contact: Christoph Lameter <cl@linux.com>
+Contact: Christoph Lameter <cl@gentwo.org>
Description:
This read-only file shows the number of per cpu partial
pages to keep around.
diff --git a/Documentation/ABI/testing/sysfs-kernel-softlockup_count b/Documentation/ABI/testing/sysfs-kernel-softlockup_count
new file mode 100644
index 000000000000..337ff5531b5f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-softlockup_count
@@ -0,0 +1,7 @@
+What: /sys/kernel/softlockup_count
+Date: May 2025
+KernelVersion: 6.16
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ Shows how many times the system has detected a soft lockup since last boot.
+ Available only if CONFIG_SOFTLOCKUP_DETECTOR is enabled.
diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module
index 62addab47d0c..6bc9af6229f0 100644
--- a/Documentation/ABI/testing/sysfs-module
+++ b/Documentation/ABI/testing/sysfs-module
@@ -59,6 +59,8 @@ Description: Module taint flags:
F force-loaded module
C staging driver module
E unsigned module
+ K livepatch module
+ N in-kernel test module
== =====================
What: /sys/module/grant_table/parameters/free_per_iteration
diff --git a/Documentation/ABI/testing/sysfs-nvmem-cells b/Documentation/ABI/testing/sysfs-nvmem-cells
index 7af70adf3690..c7c9444f92a8 100644
--- a/Documentation/ABI/testing/sysfs-nvmem-cells
+++ b/Documentation/ABI/testing/sysfs-nvmem-cells
@@ -4,18 +4,18 @@ KernelVersion: 6.5
Contact: Miquel Raynal <miquel.raynal@bootlin.com>
Description:
The "cells" folder contains one file per cell exposed by the
- NVMEM device. The name of the file is: <name>@<where>, with
- <name> being the cell name and <where> its location in the NVMEM
- device, in hexadecimal (without the '0x' prefix, to mimic device
- tree node names). The length of the file is the size of the cell
- (when known). The content of the file is the binary content of
- the cell (may sometimes be ASCII, likely without trailing
- character).
+ NVMEM device. The name of the file is: "<name>@<byte>,<bit>",
+ with <name> being the cell name and <where> its location in
+ the NVMEM device, in hexadecimal bytes and bits (without the
+ '0x' prefix, to mimic device tree node names). The length of
+ the file is the size of the cell (when known). The content of
+ the file is the binary content of the cell (may sometimes be
+ ASCII, likely without trailing character).
Note: This file is only present if CONFIG_NVMEM_SYSFS
is enabled.
Example::
- hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d
+ hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d,0
00000000 54 4e 34 38 4d 2d 50 2d 44 4e |TN48M-P-DN|
0000000a
diff --git a/Documentation/ABI/testing/sysfs-platform-alienware-wmi b/Documentation/ABI/testing/sysfs-platform-alienware-wmi
new file mode 100644
index 000000000000..4877b3745f4e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-alienware-wmi
@@ -0,0 +1,14 @@
+What: /sys/class/hwmon/hwmonX/fanY_boost
+Date: March 2025
+KernelVersion: 6.15
+Contact: Kurt Borja <kuurtb@gmail.com>
+Description:
+ This file exposes fan boost control for Dell gaming laptops with
+ the AWCC WMI interface.
+
+ See Documentation/admin-guide/laptops/alienware-wmi.rst for
+ details.
+
+ Integer value in the range 0 to 255
+
+ RW
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi
index 8a7e25bde085..89acb6638df8 100644
--- a/Documentation/ABI/testing/sysfs-platform-asus-wmi
+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi
@@ -63,6 +63,7 @@ Date: Aug 2022
KernelVersion: 6.1
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Switch the GPU hardware MUX mode. Laptops with this feature can
can be toggled to boot with only the dGPU (discrete mode) or in
standard Optimus/Hybrid mode. On switch a reboot is required:
@@ -75,6 +76,7 @@ Date: Aug 2022
KernelVersion: 5.17
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Disable discrete GPU:
* 0 - Enable dGPU,
* 1 - Disable dGPU
@@ -84,6 +86,7 @@ Date: Aug 2022
KernelVersion: 5.17
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Enable the external GPU paired with ROG X-Flow laptops.
Toggling this setting will also trigger ACPI to disable the dGPU:
@@ -95,6 +98,7 @@ Date: Aug 2022
KernelVersion: 5.17
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Enable an LCD response-time boost to reduce or remove ghosting:
* 0 - Disable,
* 1 - Enable
@@ -104,6 +108,7 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Get the current charging mode being used:
* 1 - Barrel connected charger,
* 2 - USB-C charging
@@ -114,6 +119,7 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Show if the egpu (XG Mobile) is correctly connected:
* 0 - False,
* 1 - True
@@ -123,15 +129,26 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Change the mini-LED mode:
* 0 - Single-zone,
* 1 - Multi-zone
+ * 2 - Multi-zone strong (available on newer generation mini-led)
+
+What: /sys/devices/platform/<platform>/available_mini_led_mode
+Date: Apr 2024
+KernelVersion: 6.10
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
+ List the available mini-led modes.
What: /sys/devices/platform/<platform>/ppt_pl1_spl
Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Set the Package Power Target total of CPU: PL1 on Intel, SPL on AMD.
Shown on Intel+Nvidia or AMD+Nvidia based systems:
@@ -142,6 +159,7 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Set the Slow Package Power Tracking Limit of CPU: PL2 on Intel, SPPT,
on AMD. Shown on Intel+Nvidia or AMD+Nvidia based systems:
@@ -152,6 +170,7 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Set the Fast Package Power Tracking Limit of CPU. AMD+Nvidia only:
* min=5, max=250
@@ -160,6 +179,7 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Set the APU SPPT limit. Shown on full AMD systems only:
* min=5, max=130
@@ -168,6 +188,7 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Set the platform SPPT limit. Shown on full AMD systems only:
* min=5, max=130
@@ -176,6 +197,7 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Set the dynamic boost limit of the Nvidia dGPU:
* min=5, max=25
@@ -184,5 +206,26 @@ Date: Jun 2023
KernelVersion: 6.5
Contact: "Luke Jones" <luke@ljones.dev>
Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
Set the target temperature limit of the Nvidia dGPU:
* min=75, max=87
+
+What: /sys/devices/platform/<platform>/boot_sound
+Date: Apr 2024
+KernelVersion: 6.10
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
+ Set if the BIOS POST sound is played on boot.
+ * 0 - False,
+ * 1 - True
+
+What: /sys/devices/platform/<platform>/mcu_powersave
+Date: Apr 2024
+KernelVersion: 6.10
+Contact: "Luke Jones" <luke@ljones.dev>
+Description:
+ DEPRECATED, WILL BE REMOVED SOON: please use asus-armoury
+ Set if the MCU can go in to low-power mode on system sleep
+ * 0 - False,
+ * 1 - True
diff --git a/Documentation/ABI/testing/sysfs-platform-ayaneo-ec b/Documentation/ABI/testing/sysfs-platform-ayaneo-ec
new file mode 100644
index 000000000000..4cffbf5fc7ca
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-ayaneo-ec
@@ -0,0 +1,19 @@
+What: /sys/devices/platform/ayaneo-ec/controller_power
+Date: Nov 2025
+KernelVersion: 6.19
+Contact: "Antheas Kapenekakis" <lkml@antheas.dev>
+Description:
+ Current controller power state. Allows turning on and off
+ the controller power (e.g. for power savings). Write 1 to
+ turn on, 0 to turn off. File is readable and writable.
+
+What: /sys/devices/platform/ayaneo-ec/controller_modules
+Date: Nov 2025
+KernelVersion: 6.19
+Contact: "Antheas Kapenekakis" <lkml@antheas.dev>
+Description:
+ Shows which controller modules are currently connected to
+ the device. Possible values are "left", "right" and "both".
+ File is read-only. The Windows software for this device
+ will only set controller power to 1 if both module sides
+ are connected (i.e. this file returns "both").
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi b/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
index 1f1f274a6979..b4da7b2ea0ca 100644
--- a/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
+++ b/Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
@@ -1,4 +1,4 @@
-What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_supported_type
+What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919[-X]/dell_privacy_supported_type
Date: Apr 2021
KernelVersion: 5.13
Contact: "<perry.yuan@dell.com>"
@@ -29,12 +29,12 @@ Description:
For example to check which privacy devices are supported::
- # cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_supported_type
+ # cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919*/dell_privacy_supported_type
[Microphone Mute] [supported]
[Camera Shutter] [supported]
[ePrivacy Screen] [unsupported]
-What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_current_state
+What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919[-X]/dell_privacy_current_state
Date: Apr 2021
KernelVersion: 5.13
Contact: "<perry.yuan@dell.com>"
@@ -66,6 +66,6 @@ Description:
For example to check all supported current privacy device states::
- # cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_current_state
+ # cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919*/dell_privacy_current_state
[Microphone] [unmuted]
[Camera Shutter] [unmuted]
diff --git a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
index 4989ab266682..5ec0dee9e707 100644
--- a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
@@ -27,15 +27,6 @@ Description:
* 1 -> Switched On
* 0 -> Switched Off
-What: /sys/bus/platform/devices/VPC2004:*/conservation_mode
-Date: Aug 2017
-KernelVersion: 4.14
-Contact: platform-driver-x86@vger.kernel.org
-Description:
- Controls whether the conservation mode is enabled or not.
- This feature limits the maximum battery charge percentage to
- around 50-60% in order to prolong the lifetime of the battery.
-
What: /sys/bus/platform/devices/VPC2004:*/fn_lock
Date: May 2018
KernelVersion: 4.18
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update b/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update
index 02ae1e9bbfc8..7ffd1579b8f7 100644
--- a/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update
+++ b/Documentation/ABI/testing/sysfs-platform-intel-wmi-sbl-fw-update
@@ -1,4 +1,4 @@
-What: /sys/bus/wmi/devices/44FADEB1-B204-40F2-8581-394BBDC1B651/firmware_update_request
+What: /sys/bus/wmi/devices/44FADEB1-B204-40F2-8581-394BBDC1B651[-X]/firmware_update_request
Date: April 2020
KernelVersion: 5.7
Contact: "Jithu Joseph" <jithu.joseph@intel.com>
diff --git a/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt b/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
index fd3a7ec79760..10ef1282c9d2 100644
--- a/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
+++ b/Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt
@@ -1,4 +1,4 @@
-What: /sys/devices/platform/<platform>/force_power
+What: /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341[-X]/force_power
Date: September 2017
KernelVersion: 4.15
Contact: "Mario Limonciello" <mario.limonciello@outlook.com>
diff --git a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
index 65ed3865da62..09f783fa0a53 100644
--- a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
+++ b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
@@ -150,3 +150,13 @@ Description:
The "mfg_lock" sysfs attribute is write-only.
A successful write to this attribute will latch the
board-level attributes into EEPROM, making them read-only.
+
+What: /sys/bus/platform/devices/MLNXBF04:00/rtc_battery
+Date: June 2025
+KernelVersion: 6.15
+Contact: "Xiangrong Li <xiangrongl@nvidia.com>"
+Description:
+ The "rtc_battery" sysfs attribute is read-only.
+ A successful read from this attribute returns the status of
+ the board's RTC battery. The RTC battery status register is
+ also cleared upon successful read operation.
diff --git a/Documentation/ABI/testing/sysfs-platform-mellanox-pmc b/Documentation/ABI/testing/sysfs-platform-mellanox-pmc
new file mode 100644
index 000000000000..29b3f9c58e00
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-mellanox-pmc
@@ -0,0 +1,64 @@
+HID Driver Description
+MLNXBFD0 mlxbf-pmc Performance counters (BlueField-1)
+MLNXBFD1 mlxbf-pmc Performance counters (BlueField-2)
+MLNXBFD2 mlxbf-pmc Performance counters (BlueField-3)
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/event_list
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ List of events supported by the counters in the specific block.
+ It is used to extract the event number or ID associated with
+ each event.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/event<N>
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Event monitored by corresponding counter. This is used to
+ program or read back the event that should be or is currently
+ being monitored by counter<N>.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/counter<N>
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Counter value of the event being monitored. This is used to
+ read the counter value of the event which was programmed using
+ event<N>. This is also used to clear or reset the counter value
+ by writing 0 to the counter sysfs.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/enable
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Start or stop counters. This is used to start the counters
+ for monitoring the programmed events and also to stop the
+ counters after the desired duration. Writing value 1 will
+ start all the counters in the block, and writing 0 will
+ stop all the counters together.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/<reg>
+Date: Dec 2020
+KernelVersion: 5.10
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Value of register. This is used to read or reset the registers
+ where various performance statistics are counted for each block.
+ Writing 0 to the sysfs will clear the counter, writing any other
+ value is not allowed.
+
+What: /sys/bus/platform/devices/<HID>/hwmon/hwmonX/<block>/count_clock
+Date: Mar 2025
+KernelVersion: 6.14
+Contact: "Shravan Kumar Ramani <shravankr@nvidia.com>"
+Description:
+ Use a counter for counting cycles. This is used to repurpose/dedicate
+ any of the counters in the block to counting cycles. Each counter is
+ represented by a bit (bit 0 for counter0, bit1 for counter1 and so on)
+ and setting the corresponding bit will reserve that specific counter
+ for counting cycles and override the event<N> setting.
diff --git a/Documentation/ABI/testing/sysfs-platform-oxp b/Documentation/ABI/testing/sysfs-platform-oxp
new file mode 100644
index 000000000000..b3f39fc21dfa
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-oxp
@@ -0,0 +1,25 @@
+What: /sys/devices/platform/<platform>/tt_toggle
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: "Antheas Kapenekakis" <lkml@antheas.dev>
+Description:
+ Takeover TDP controls from the device. OneXPlayer devices have a
+ turbo button that can be used to switch between two TDP modes
+ (usually 15W and 25W). By setting this attribute to 1, this
+ functionality is disabled, handing TDP control over to (Windows)
+ userspace software and the Turbo button turns into a keyboard
+ shortcut over the AT keyboard of the device. In addition,
+ using this setting is a prerequisite for PWM control for most
+ newer models (otherwise it NOOPs).
+
+What: /sys/devices/platform/<platform>/tt_led
+Date: April 2025
+KernelVersion: 6.16
+Contact: "Antheas Kapenekakis" <lkml@antheas.dev>
+Description:
+ Some OneXPlayer devices (e.g., X1 series) feature a little LED
+ nested in the Turbo button. This LED is illuminated when the
+ device is in the higher TDP mode (e.g., 25W). Once tt_toggle
+ is engaged, this LED is left dangling to its last state. This
+ attribute allows userspace to control the LED state manually
+ (either with 1 or 0). Only a subset of devices contain this LED.
diff --git a/Documentation/ABI/testing/sysfs-platform_profile b/Documentation/ABI/testing/sysfs-platform_profile
index baf1d125f9f8..125324ab53a9 100644
--- a/Documentation/ABI/testing/sysfs-platform_profile
+++ b/Documentation/ABI/testing/sysfs-platform_profile
@@ -33,3 +33,8 @@ Description: Reading this file gives the current selected profile for this
source such as e.g. a hotkey triggered profile change handled
either directly by the embedded-controller or fully handled
inside the kernel.
+
+ This file may also emit the string 'custom' to indicate
+ that multiple platform profiles drivers are in use but
+ have different values. This string can not be written to
+ this interface and is solely for informational purposes.
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index a3942b1036e2..d38da077905a 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -1,6 +1,6 @@
What: /sys/power/
Date: August 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power directory will contain files that will
provide a unified interface to the power management
@@ -8,7 +8,7 @@ Description:
What: /sys/power/state
Date: November 2016
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/state file controls system sleep states.
Reading from this file returns the available sleep state
@@ -23,7 +23,7 @@ Description:
What: /sys/power/mem_sleep
Date: November 2016
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/mem_sleep file controls the operating mode of
system suspend. Reading from it returns the available modes
@@ -41,7 +41,7 @@ Description:
What: /sys/power/disk
Date: September 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/disk file controls the operating mode of the
suspend-to-disk mechanism. Reading from this file returns
@@ -90,7 +90,7 @@ Description:
What: /sys/power/image_size
Date: August 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/image_size file controls the size of the image
created by the suspend-to-disk mechanism. It can be written a
@@ -107,7 +107,7 @@ Description:
What: /sys/power/pm_trace
Date: August 2006
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/pm_trace file controls the code which saves the
last PM event point in the RTC across reboots, so that you can
@@ -131,7 +131,7 @@ Description:
CAUTION: Using it will cause your machine's real-time (CMOS)
clock to be set to a random invalid time after a resume.
-What; /sys/power/pm_trace_dev_match
+What: /sys/power/pm_trace_dev_match
Date: October 2010
Contact: James Hogan <jhogan@kernel.org>
Description:
@@ -156,7 +156,7 @@ Description:
What: /sys/power/pm_async
Date: January 2009
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/pm_async file controls the switch allowing the
user space to enable or disable asynchronous suspend and resume
@@ -169,7 +169,7 @@ Description:
What: /sys/power/wakeup_count
Date: July 2010
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/wakeup_count file allows user space to put the
system into a sleep state while taking into account the
@@ -184,7 +184,7 @@ Description:
What: /sys/power/reserved_size
Date: May 2011
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/reserved_size file allows user space to control
the amount of memory reserved for allocations made by device
@@ -198,7 +198,7 @@ Description:
What: /sys/power/autosleep
Date: April 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/autosleep file can be written one of the strings
returned by reads from /sys/power/state. If that happens, a
@@ -215,7 +215,7 @@ Description:
What: /sys/power/wake_lock
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/wake_lock file allows user space to create
wakeup source objects and activate them on demand (if one of
@@ -242,7 +242,7 @@ Description:
What: /sys/power/wake_unlock
Date: February 2012
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/wake_unlock file allows user space to deactivate
wakeup sources created with the help of /sys/power/wake_lock.
@@ -283,7 +283,7 @@ Description:
What: /sys/power/pm_debug_messages
Date: July 2017
-Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Contact: Rafael J. Wysocki <rafael@kernel.org>
Description:
The /sys/power/pm_debug_messages file controls the printing
of debug messages from the system suspend/hiberbation
@@ -454,3 +454,19 @@ Description:
disables it. Reads from the file return the current value.
The default is "1" if the build-time "SUSPEND_SKIP_SYNC" config
flag is unset, or "0" otherwise.
+
+What: /sys/power/hibernate_compression_threads
+Date: October 2025
+Contact: <luoxueqin@kylinos.cn>
+Description:
+ Controls the number of threads used for compression
+ and decompression of hibernation images.
+
+ The value can be adjusted at runtime to balance
+ performance and CPU utilization.
+
+ The change takes effect on the next hibernation or
+ resume operation.
+
+ Minimum value: 1
+ Default value: 3
diff --git a/Documentation/ABI/testing/sysfs-pps-gen b/Documentation/ABI/testing/sysfs-pps-gen
new file mode 100644
index 000000000000..2519207b88fd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-pps-gen
@@ -0,0 +1,43 @@
+What: /sys/class/pps-gen/
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ The /sys/class/pps-gen/ directory contains files and
+ directories that provide a unified interface to the PPS
+ generators.
+
+What: /sys/class/pps-gen/pps-genX/
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ The /sys/class/pps-gen/pps-genX/ directory is related to X-th
+ PPS generator in the system. Each directory contain files to
+ manage and control its PPS generator.
+
+What: /sys/class/pps-gen/pps-genX/enable
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ This write-only file enables or disables generation of the
+ PPS signal.
+
+What: /sys/class/pps-gen/pps-genX/system
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ This read-only file returns "1" if the generator takes the
+ timing from the system clock, while it returns "0" if not
+ (i.e. from a peripheral device clock).
+
+What: /sys/class/pps-gen/pps-genX/time
+Date: February 2025
+KernelVersion: 6.13
+Contact: Rodolfo Giometti <giometti@enneenne.com>
+Description:
+ This read-only file contains the current time stored into the
+ generator clock as two integers representing the current time
+ seconds and nanoseconds.
diff --git a/Documentation/ABI/testing/sysfs-pps-gen-tio b/Documentation/ABI/testing/sysfs-pps-gen-tio
new file mode 100644
index 000000000000..3c34ff17a335
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-pps-gen-tio
@@ -0,0 +1,6 @@
+What: /sys/class/pps-gen/pps-genx/enable
+Date: April 2025
+KernelVersion: 6.15
+Contact: Subramanian Mohan<subramanian.mohan@intel.com>
+Description:
+ Enable or disable PPS TIO generator output.
diff --git a/Documentation/ABI/testing/sysfs-secvar b/Documentation/ABI/testing/sysfs-secvar
index 857cf12b0904..1016967a730f 100644
--- a/Documentation/ABI/testing/sysfs-secvar
+++ b/Documentation/ABI/testing/sysfs-secvar
@@ -22,9 +22,13 @@ Description: A string indicating which backend is in use by the firmware.
and is expected to be "ibm,edk2-compat-v1".
On pseries/PLPKS, this is generated by the kernel based on the
- version number in the SB_VERSION variable in the keystore, and
- has the form "ibm,plpks-sb-v<version>", or
- "ibm,plpks-sb-unknown" if there is no SB_VERSION variable.
+ version number in the SB_VERSION variable in the keystore. The
+ version numbering in the SB_VERSION variable starts from 1. The
+ format string takes the form "ibm,plpks-sb-v<version>" in the
+ case of dynamic key management mode. If the SB_VERSION variable
+ does not exist (or there is an error while reading it), it takes
+ the form "ibm,plpks-sb-v0", indicating that the key management
+ mode is static.
What: /sys/firmware/secvar/vars/<variable name>
Date: August 2019
@@ -34,6 +38,13 @@ Description: Each secure variable is represented as a directory named as
representation. The data and size can be determined by reading
their respective attribute files.
+ Only secvars relevant to the key management mode are exposed.
+ Only in the dynamic key management mode should the user have
+ access (read and write) to the secure boot secvars db, dbx,
+ grubdb, grubdbx, and sbat. These secvars are not consumed in the
+ static key management mode. PK, trustedcadb and moduledb are the
+ secvars common to both static and dynamic key management modes.
+
What: /sys/firmware/secvar/vars/<variable_name>/size
Date: August 2019
Contact: Nayna Jain <nayna@linux.ibm.com>
diff --git a/Documentation/ABI/testing/sysfs-timecard b/Documentation/ABI/testing/sysfs-timecard
index 220478156297..3ae41b7634ac 100644
--- a/Documentation/ABI/testing/sysfs-timecard
+++ b/Documentation/ABI/testing/sysfs-timecard
@@ -258,24 +258,29 @@ Description: (RW) When retrieving the PHC with the PTP SYS_OFFSET_EXTENDED
the estimated point where the FPGA latches the PHC time. This
value may be changed by writing an unsigned integer.
-What: /sys/class/timecard/ocpN/ttyGNSS
-What: /sys/class/timecard/ocpN/ttyGNSS2
-Date: September 2021
+What: /sys/class/timecard/ocpN/tty
+Date: August 2024
+Contact: Vadim Fedorenko <vadim.fedorenko@linux.dev>
+Description: (RO) Directory containing the sysfs nodes for TTY attributes
+
+What: /sys/class/timecard/ocpN/tty/ttyGNSS
+What: /sys/class/timecard/ocpN/tty/ttyGNSS2
+Date: August 2024
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
-Description: These optional attributes link to the TTY serial ports
- associated with the GNSS devices.
+Description: (RO) These optional attributes contain names of the TTY serial
+ ports associated with the GNSS devices.
-What: /sys/class/timecard/ocpN/ttyMAC
-Date: September 2021
+What: /sys/class/timecard/ocpN/tty/ttyMAC
+Date: August 2024
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
-Description: This optional attribute links to the TTY serial port
- associated with the Miniature Atomic Clock.
+Description: (RO) This optional attribute contains name of the TTY serial
+ port associated with the Miniature Atomic Clock.
-What: /sys/class/timecard/ocpN/ttyNMEA
-Date: September 2021
+What: /sys/class/timecard/ocpN/tty/ttyNMEA
+Date: August 2024
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
-Description: This optional attribute links to the TTY serial port
- which outputs the PHC time in NMEA ZDA format.
+Description: (RO) This optional attribute contains name of the TTY serial
+ port which outputs the PHC time in NMEA ZDA format.
What: /sys/class/timecard/ocpN/utc_tai_offset
Date: September 2021
diff --git a/Documentation/Kconfig b/Documentation/Kconfig
index 3a0e7ac0c4e3..8b6c4b84b218 100644
--- a/Documentation/Kconfig
+++ b/Documentation/Kconfig
@@ -19,7 +19,7 @@ config WARN_ABI_ERRORS
described at Documentation/ABI/README. Yet, as they're manually
written, it would be possible that some of those files would
have errors that would break them for being parsed by
- scripts/get_abi.pl. Add a check to verify them.
+ tools/docs/get_abi.py. Add a check to verify them.
If unsure, select 'N'.
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 3885bbe260eb..e96ac6dcac4f 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -5,14 +5,16 @@
# for cleaning
subdir- := devicetree/bindings
+ifneq ($(MAKECMDGOALS),cleandocs)
# Check for broken documentation file references
ifeq ($(CONFIG_WARN_MISSING_DOCUMENTS),y)
-$(shell $(srctree)/scripts/documentation-file-ref-check --warn)
+$(shell $(srctree)/tools/docs/documentation-file-ref-check --warn)
endif
# Check for broken ABI files
ifeq ($(CONFIG_WARN_ABI_ERRORS),y)
-$(shell $(srctree)/scripts/get_abi.pl validate --dir $(srctree)/Documentation/ABI)
+$(shell $(srctree)/tools/docs/get_abi.py --dir $(srctree)/Documentation/ABI validate)
+endif
endif
# You can set these variables from the command line.
@@ -21,16 +23,21 @@ SPHINXOPTS =
SPHINXDIRS = .
DOCS_THEME =
DOCS_CSS =
-_SPHINXDIRS = $(sort $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst)))
-SPHINX_CONF = conf.py
+RUSTDOC =
PAPER =
BUILDDIR = $(obj)/output
PDFLATEX = xelatex
LATEXOPTS = -interaction=batchmode -no-shell-escape
-ifeq ($(findstring 1, $(KBUILD_VERBOSE)),)
-SPHINXOPTS += "-q"
-endif
+PYTHONPYCACHEPREFIX ?= $(abspath $(BUILDDIR)/__pycache__)
+
+# Wrapper for sphinx-build
+
+BUILD_WRAPPER = $(srctree)/tools/docs/sphinx-build-wrapper
+
+# For denylisting "variable font" files
+# Can be overridden by setting as an env variable
+FONTS_CONF_DENY_VF ?= $(HOME)/deny-vf
# User-friendly check for sphinx-build
HAVE_SPHINX := $(shell if which $(SPHINXBUILD) >/dev/null 2>&1; then echo 1; else echo 0; fi)
@@ -40,150 +47,46 @@ ifeq ($(HAVE_SPHINX),0)
.DEFAULT:
$(warning The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed and in PATH, or set the SPHINXBUILD make variable to point to the full path of the '$(SPHINXBUILD)' executable.)
@echo
- @$(srctree)/scripts/sphinx-pre-install
+ @$(srctree)/tools/docs/sphinx-pre-install
@echo " SKIP Sphinx $@ target."
else # HAVE_SPHINX
-# User-friendly check for pdflatex and latexmk
-HAVE_PDFLATEX := $(shell if which $(PDFLATEX) >/dev/null 2>&1; then echo 1; else echo 0; fi)
-HAVE_LATEXMK := $(shell if which latexmk >/dev/null 2>&1; then echo 1; else echo 0; fi)
-
-ifeq ($(HAVE_LATEXMK),1)
- PDFLATEX := latexmk -$(PDFLATEX)
-endif #HAVE_LATEXMK
-
-# Internal variables.
-PAPEROPT_a4 = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-KERNELDOC = $(srctree)/scripts/kernel-doc
-KERNELDOC_CONF = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
-ALLSPHINXOPTS = $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
-ifneq ($(wildcard $(srctree)/.config),)
-ifeq ($(CONFIG_RUST),y)
- # Let Sphinx know we will include rustdoc
- ALLSPHINXOPTS += -t rustdoc
-endif
-endif
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-# commands; the 'cmd' from scripts/Kbuild.include is not *loopable*
-loop_cmd = $(echo-cmd) $(cmd_$(1)) || exit;
-
-# $2 sphinx builder e.g. "html"
-# $3 name of the build subfolder / e.g. "userspace-api/media", used as:
-# * dest folder relative to $(BUILDDIR) and
-# * cache folder relative to $(BUILDDIR)/.doctrees
-# $4 dest subfolder e.g. "man" for man pages at userspace-api/media/man
-# $5 reST source folder relative to $(srctree)/$(src),
-# e.g. "userspace-api/media" for the linux-tv book-set at ./Documentation/userspace-api/media
-
-quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
- cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/userspace-api/media $2 && \
- PYTHONDONTWRITEBYTECODE=1 \
- BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
- $(PYTHON3) $(srctree)/scripts/jobserver-exec \
- $(CONFIG_SHELL) $(srctree)/Documentation/sphinx/parallel-wrapper.sh \
- $(SPHINXBUILD) \
- -b $2 \
- -c $(abspath $(srctree)/$(src)) \
- -d $(abspath $(BUILDDIR)/.doctrees/$3) \
- -D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) \
- $(ALLSPHINXOPTS) \
- $(abspath $(srctree)/$(src)/$5) \
- $(abspath $(BUILDDIR)/$3/$4) && \
- if [ "x$(DOCS_CSS)" != "x" ]; then \
- cp $(if $(patsubst /%,,$(DOCS_CSS)),$(abspath $(srctree)/$(DOCS_CSS)),$(DOCS_CSS)) $(BUILDDIR)/$3/_static/; \
- fi
-
-YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst
-YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec
-YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs
-YNL_TOOL:=$(srctree)/tools/net/ynl/ynl-gen-rst.py
-
-YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml))
-YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP))
-
-$(YNL_INDEX): $(YNL_RST_FILES)
- $(Q)$(YNL_TOOL) -o $@ -x
-
-$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml $(YNL_TOOL)
- $(Q)$(YNL_TOOL) -i $< -o $@
-
-htmldocs: $(YNL_INDEX)
- @$(srctree)/scripts/sphinx-pre-install --version-check
- @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
-
-# If Rust support is available and .config exists, add rustdoc generated contents.
-# If there are any, the errors from this make rustdoc will be displayed but
-# won't stop the execution of htmldocs
-
-ifneq ($(wildcard $(srctree)/.config),)
-ifeq ($(CONFIG_RUST),y)
- $(Q)$(MAKE) rustdoc || true
-endif
-endif
-
-texinfodocs:
- @$(srctree)/scripts/sphinx-pre-install --version-check
- @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,texinfo,$(var),texinfo,$(var)))
-
-# Note: the 'info' Make target is generated by sphinx itself when
-# running the texinfodocs target define above.
-infodocs: texinfodocs
- $(MAKE) -C $(BUILDDIR)/texinfo info
-
-linkcheckdocs:
- @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var)))
-
-latexdocs:
- @$(srctree)/scripts/sphinx-pre-install --version-check
- @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
+# Common documentation targets
+htmldocs mandocs infodocs texinfodocs latexdocs epubdocs xmldocs pdfdocs linkcheckdocs:
+ $(Q)PYTHONPYCACHEPREFIX="$(PYTHONPYCACHEPREFIX)" \
+ $(srctree)/tools/docs/sphinx-pre-install --version-check
+ +$(Q)PYTHONPYCACHEPREFIX="$(PYTHONPYCACHEPREFIX)" \
+ $(PYTHON3) $(BUILD_WRAPPER) $@ \
+ --sphinxdirs="$(SPHINXDIRS)" $(RUSTDOC) \
+ --builddir="$(BUILDDIR)" --deny-vf=$(FONTS_CONF_DENY_VF) \
+ --theme=$(DOCS_THEME) --css=$(DOCS_CSS) --paper=$(PAPER)
-ifeq ($(HAVE_PDFLATEX),0)
-pdfdocs:
- $(warning The '$(PDFLATEX)' command was not found. Make sure you have it installed and in PATH to produce PDF output.)
- @echo " SKIP Sphinx $@ target."
-
-else # HAVE_PDFLATEX
-
-pdfdocs: latexdocs
- @$(srctree)/scripts/sphinx-pre-install --version-check
- $(foreach var,$(SPHINXDIRS), \
- $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit; \
- mkdir -p $(BUILDDIR)/$(var)/pdf; \
- mv $(subst .tex,.pdf,$(wildcard $(BUILDDIR)/$(var)/latex/*.tex)) $(BUILDDIR)/$(var)/pdf/; \
- )
-
-endif # HAVE_PDFLATEX
-
-epubdocs:
- @$(srctree)/scripts/sphinx-pre-install --version-check
- @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
-
-xmldocs:
- @$(srctree)/scripts/sphinx-pre-install --version-check
- @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
-
-endif # HAVE_SPHINX
+endif
# The following targets are independent of HAVE_SPHINX, and the rules should
# work or silently pass without Sphinx.
+htmldocs-redirects: $(srctree)/Documentation/.renames.txt
+ @tools/docs/gen-redirects.py --output $(BUILDDIR) < $<
+
refcheckdocs:
- $(Q)cd $(srctree);scripts/documentation-file-ref-check
+ $(Q)cd $(srctree); tools/docs/documentation-file-ref-check
cleandocs:
$(Q)rm -rf $(BUILDDIR)
- $(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/userspace-api/media clean
+
+# Used only on help
+_SPHINXDIRS = $(shell printf "%s\n" $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst)) | sort -f)
dochelp:
@echo ' Linux kernel internal documentation in different formats from ReST:'
@echo ' htmldocs - HTML'
+ @echo ' htmldocs-redirects - generate HTML redirects for moved pages'
@echo ' texinfodocs - Texinfo'
@echo ' infodocs - Info'
+ @echo ' mandocs - Man pages'
@echo ' latexdocs - LaTeX'
@echo ' pdfdocs - PDF'
@echo ' epubdocs - EPUB'
@@ -195,13 +98,17 @@ dochelp:
@echo ' cleandocs - clean all generated files'
@echo
@echo ' make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2'
- @echo ' valid values for SPHINXDIRS are: $(_SPHINXDIRS)'
- @echo
- @echo ' make SPHINX_CONF={conf-file} [target] use *additional* sphinx-build'
- @echo ' configuration. This is e.g. useful to build with nit-picking config.'
+ @echo ' top level values for SPHINXDIRS are: $(_SPHINXDIRS)'
+ @echo ' you may also use a subdirectory like SPHINXDIRS=userspace-api/media,'
+ @echo ' provided that there is an index.rst file at the subdirectory.'
@echo
@echo ' make DOCS_THEME={sphinx-theme} selects a different Sphinx theme.'
@echo
@echo ' make DOCS_CSS={a .css file} adds a DOCS_CSS override file for html/epub output.'
@echo
+ @echo ' make PAPER={a4|letter} Specifies the paper size used for LaTeX/PDF output.'
+ @echo
+ @echo ' make FONTS_CONF_DENY_VF={path} sets a deny list to block variable Noto CJK fonts'
+ @echo ' for PDF build. See tools/lib/python/kdoc/latex_fonts.py for more details'
+ @echo
@echo ' Default location for the generated documents is Documentation/output'
diff --git a/Documentation/PCI/controller/index.rst b/Documentation/PCI/controller/index.rst
new file mode 100644
index 000000000000..c2ce9ccdcfa0
--- /dev/null
+++ b/Documentation/PCI/controller/index.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+PCI Native Host Bridge and Endpoint Drivers
+===========================================
+
+.. toctree::
+ :maxdepth: 2
+
+ rcar-pcie-firmware
diff --git a/Documentation/PCI/controller/rcar-pcie-firmware.rst b/Documentation/PCI/controller/rcar-pcie-firmware.rst
new file mode 100644
index 000000000000..67d3bf66e315
--- /dev/null
+++ b/Documentation/PCI/controller/rcar-pcie-firmware.rst
@@ -0,0 +1,32 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+Firmware of PCIe controller for Renesas R-Car V4H
+=================================================
+
+Renesas R-Car V4H (r8a779g0) has a PCIe controller, requiring a specific
+firmware download during startup.
+
+However, Renesas currently cannot distribute the firmware free of charge.
+
+The firmware file "104_PCIe_fw_addr_data_ver1.05.txt" (note that the file name
+might be different between different datasheet revisions) can be found in the
+datasheet encoded as text, and as such, the file's content must be converted
+back to binary form. This can be achieved using the following example script:
+
+.. code-block:: sh
+
+ $ awk '/^\s*0x[0-9A-Fa-f]{4}\s+0x[0-9A-Fa-f]{4}/ { print substr($2,5,2) substr($2,3,2) }' \
+ 104_PCIe_fw_addr_data_ver1.05.txt | \
+ xxd -p -r > rcar_gen4_pcie.bin
+
+Once the text content has been converted into a binary firmware file, verify
+its checksum as follows:
+
+.. code-block:: sh
+
+ $ sha1sum rcar_gen4_pcie.bin
+ 1d0bd4b189b4eb009f5d564b1f93a79112994945 rcar_gen4_pcie.bin
+
+The resulting binary file called "rcar_gen4_pcie.bin" should be placed in the
+"/lib/firmware" directory before the driver runs.
diff --git a/Documentation/PCI/endpoint/index.rst b/Documentation/PCI/endpoint/index.rst
index 4d2333e7ae06..dd1f62e731c9 100644
--- a/Documentation/PCI/endpoint/index.rst
+++ b/Documentation/PCI/endpoint/index.rst
@@ -15,6 +15,7 @@ PCI Endpoint Framework
pci-ntb-howto
pci-vntb-function
pci-vntb-howto
+ pci-nvme-function
function/binding/pci-test
function/binding/pci-ntb
diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.rst b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
index fb73345cfb8a..e69c2872ce3b 100644
--- a/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
+++ b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
@@ -86,7 +86,7 @@ The <EPF Device> directory can have a list of symbolic links
be created by the user to represent the virtual functions that are bound to
the physical function. In the above directory structure <EPF Device 11> is a
physical function and <EPF Device 31> is a virtual function. An EPF device once
-it's linked to another EPF device, cannot be linked to a EPC device.
+it's linked to another EPF device, cannot be linked to an EPC device.
EPC Device
==========
@@ -108,7 +108,7 @@ entries corresponding to EPC device will be created by the EPC core.
The <EPC Device> directory will have a list of symbolic links to
<EPF Device>. These symbolic links should be created by the user to
represent the functions present in the endpoint device. Only <EPF Device>
-that represents a physical function can be linked to a EPC device.
+that represents a physical function can be linked to an EPC device.
The <EPC Device> directory will also have a *start* field. Once
"1" is written to this field, the endpoint device will be ready to
diff --git a/Documentation/PCI/endpoint/pci-endpoint.rst b/Documentation/PCI/endpoint/pci-endpoint.rst
index 4f5622a65555..0741c8cbd74e 100644
--- a/Documentation/PCI/endpoint/pci-endpoint.rst
+++ b/Documentation/PCI/endpoint/pci-endpoint.rst
@@ -57,11 +57,10 @@ by the PCI controller driver.
The PCI controller driver can then create a new EPC device by invoking
devm_pci_epc_create()/pci_epc_create().
-* devm_pci_epc_destroy()/pci_epc_destroy()
+* pci_epc_destroy()
- The PCI controller driver can destroy the EPC device created by either
- devm_pci_epc_create() or pci_epc_create() using devm_pci_epc_destroy() or
- pci_epc_destroy().
+ The PCI controller driver can destroy the EPC device created by
+ pci_epc_create() using pci_epc_destroy().
* pci_epc_linkup()
@@ -117,6 +116,35 @@ by the PCI endpoint function driver.
The PCI endpoint function driver should use pci_epc_mem_free_addr() to
free the memory space allocated using pci_epc_mem_alloc_addr().
+* pci_epc_map_addr()
+
+ A PCI endpoint function driver should use pci_epc_map_addr() to map to a RC
+ PCI address the CPU address of local memory obtained with
+ pci_epc_mem_alloc_addr().
+
+* pci_epc_unmap_addr()
+
+ A PCI endpoint function driver should use pci_epc_unmap_addr() to unmap the
+ CPU address of local memory mapped to a RC address with pci_epc_map_addr().
+
+* pci_epc_mem_map()
+
+ A PCI endpoint controller may impose constraints on the RC PCI addresses that
+ can be mapped. The function pci_epc_mem_map() allows endpoint function
+ drivers to allocate and map controller memory while handling such
+ constraints. This function will determine the size of the memory that must be
+ allocated with pci_epc_mem_alloc_addr() for successfully mapping a RC PCI
+ address range. This function will also indicate the size of the PCI address
+ range that was actually mapped, which can be less than the requested size, as
+ well as the offset into the allocated memory to use for accessing the mapped
+ RC PCI address range.
+
+* pci_epc_mem_unmap()
+
+ A PCI endpoint function driver can use pci_epc_mem_unmap() to unmap and free
+ controller memory that was allocated and mapped using pci_epc_mem_map().
+
+
Other EPC APIs
~~~~~~~~~~~~~~
@@ -169,11 +197,11 @@ by the PCI endpoint function driver.
* pci_epf_register_driver()
The PCI Endpoint Function driver should implement the following ops:
- * bind: ops to perform when a EPC device has been bound to EPF device
- * unbind: ops to perform when a binding has been lost between a EPC
+ * bind: ops to perform when an EPC device has been bound to EPF device
+ * unbind: ops to perform when a binding has been lost between an EPC
device and EPF device
- * linkup: ops to perform when the EPC device has established a
- connection with a host system
+ * add_cfs: optional ops to create function specific configfs
+ attributes
The PCI Function driver can then register the PCI EPF driver by using
pci_epf_register_driver().
@@ -223,7 +251,7 @@ pci-ep-cfs.c can be used as reference for using these APIs.
* pci_epf_bind()
pci_epf_bind() should be invoked when the EPF device has been bound to
- a EPC device.
+ an EPC device.
* pci_epf_unbind()
diff --git a/Documentation/PCI/endpoint/pci-nvme-function.rst b/Documentation/PCI/endpoint/pci-nvme-function.rst
new file mode 100644
index 000000000000..a68015317f7f
--- /dev/null
+++ b/Documentation/PCI/endpoint/pci-nvme-function.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+PCI NVMe Function
+=================
+
+:Author: Damien Le Moal <dlemoal@kernel.org>
+
+The PCI NVMe endpoint function implements a PCI NVMe controller using the NVMe
+subsystem target core code. The driver for this function resides with the NVMe
+subsystem as drivers/nvme/target/pci-epf.c.
+
+See Documentation/nvme/nvme-pci-endpoint-target.rst for more details.
diff --git a/Documentation/PCI/endpoint/pci-test-howto.rst b/Documentation/PCI/endpoint/pci-test-howto.rst
index 909f770a07d6..dd66858cde46 100644
--- a/Documentation/PCI/endpoint/pci-test-howto.rst
+++ b/Documentation/PCI/endpoint/pci-test-howto.rst
@@ -81,8 +81,8 @@ device, the following commands can be used::
# echo 0x104c > functions/pci_epf_test/func1/vendorid
# echo 0xb500 > functions/pci_epf_test/func1/deviceid
- # echo 16 > functions/pci_epf_test/func1/msi_interrupts
- # echo 8 > functions/pci_epf_test/func1/msix_interrupts
+ # echo 32 > functions/pci_epf_test/func1/msi_interrupts
+ # echo 2048 > functions/pci_epf_test/func1/msix_interrupts
Binding pci-epf-test Device to EP Controller
@@ -123,113 +123,98 @@ above::
Using Endpoint Test function Device
-----------------------------------
-pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
-tests. To compile this tool the following commands should be used::
+Kselftest added in tools/testing/selftests/pci_endpoint can be used to run all
+the default PCI endpoint tests. To build the Kselftest for PCI endpoint
+subsystem, the following commands should be used::
# cd <kernel-dir>
- # make -C tools/pci
+ # make -C tools/testing/selftests/pci_endpoint
or if you desire to compile and install in your system::
# cd <kernel-dir>
- # make -C tools/pci install
+ # make -C tools/testing/selftests/pci_endpoint INSTALL_PATH=/usr/bin install
-The tool and script will be located in <rootfs>/usr/bin/
+The test will be located in <rootfs>/usr/bin/
-
-pcitest.sh Output
-~~~~~~~~~~~~~~~~~
+Kselftest Output
+~~~~~~~~~~~~~~~~
::
- # pcitest.sh
- BAR tests
-
- BAR0: OKAY
- BAR1: OKAY
- BAR2: OKAY
- BAR3: OKAY
- BAR4: NOT OKAY
- BAR5: NOT OKAY
-
- Interrupt tests
-
- SET IRQ TYPE TO LEGACY: OKAY
- LEGACY IRQ: NOT OKAY
- SET IRQ TYPE TO MSI: OKAY
- MSI1: OKAY
- MSI2: OKAY
- MSI3: OKAY
- MSI4: OKAY
- MSI5: OKAY
- MSI6: OKAY
- MSI7: OKAY
- MSI8: OKAY
- MSI9: OKAY
- MSI10: OKAY
- MSI11: OKAY
- MSI12: OKAY
- MSI13: OKAY
- MSI14: OKAY
- MSI15: OKAY
- MSI16: OKAY
- MSI17: NOT OKAY
- MSI18: NOT OKAY
- MSI19: NOT OKAY
- MSI20: NOT OKAY
- MSI21: NOT OKAY
- MSI22: NOT OKAY
- MSI23: NOT OKAY
- MSI24: NOT OKAY
- MSI25: NOT OKAY
- MSI26: NOT OKAY
- MSI27: NOT OKAY
- MSI28: NOT OKAY
- MSI29: NOT OKAY
- MSI30: NOT OKAY
- MSI31: NOT OKAY
- MSI32: NOT OKAY
- SET IRQ TYPE TO MSI-X: OKAY
- MSI-X1: OKAY
- MSI-X2: OKAY
- MSI-X3: OKAY
- MSI-X4: OKAY
- MSI-X5: OKAY
- MSI-X6: OKAY
- MSI-X7: OKAY
- MSI-X8: OKAY
- MSI-X9: NOT OKAY
- MSI-X10: NOT OKAY
- MSI-X11: NOT OKAY
- MSI-X12: NOT OKAY
- MSI-X13: NOT OKAY
- MSI-X14: NOT OKAY
- MSI-X15: NOT OKAY
- MSI-X16: NOT OKAY
- [...]
- MSI-X2047: NOT OKAY
- MSI-X2048: NOT OKAY
-
- Read Tests
-
- SET IRQ TYPE TO MSI: OKAY
- READ ( 1 bytes): OKAY
- READ ( 1024 bytes): OKAY
- READ ( 1025 bytes): OKAY
- READ (1024000 bytes): OKAY
- READ (1024001 bytes): OKAY
-
- Write Tests
-
- WRITE ( 1 bytes): OKAY
- WRITE ( 1024 bytes): OKAY
- WRITE ( 1025 bytes): OKAY
- WRITE (1024000 bytes): OKAY
- WRITE (1024001 bytes): OKAY
-
- Copy Tests
-
- COPY ( 1 bytes): OKAY
- COPY ( 1024 bytes): OKAY
- COPY ( 1025 bytes): OKAY
- COPY (1024000 bytes): OKAY
- COPY (1024001 bytes): OKAY
+ # pci_endpoint_test
+ TAP version 13
+ 1..16
+ # Starting 16 tests from 9 test cases.
+ # RUN pci_ep_bar.BAR0.BAR_TEST ...
+ # OK pci_ep_bar.BAR0.BAR_TEST
+ ok 1 pci_ep_bar.BAR0.BAR_TEST
+ # RUN pci_ep_bar.BAR1.BAR_TEST ...
+ # OK pci_ep_bar.BAR1.BAR_TEST
+ ok 2 pci_ep_bar.BAR1.BAR_TEST
+ # RUN pci_ep_bar.BAR2.BAR_TEST ...
+ # OK pci_ep_bar.BAR2.BAR_TEST
+ ok 3 pci_ep_bar.BAR2.BAR_TEST
+ # RUN pci_ep_bar.BAR3.BAR_TEST ...
+ # OK pci_ep_bar.BAR3.BAR_TEST
+ ok 4 pci_ep_bar.BAR3.BAR_TEST
+ # RUN pci_ep_bar.BAR4.BAR_TEST ...
+ # OK pci_ep_bar.BAR4.BAR_TEST
+ ok 5 pci_ep_bar.BAR4.BAR_TEST
+ # RUN pci_ep_bar.BAR5.BAR_TEST ...
+ # OK pci_ep_bar.BAR5.BAR_TEST
+ ok 6 pci_ep_bar.BAR5.BAR_TEST
+ # RUN pci_ep_basic.CONSECUTIVE_BAR_TEST ...
+ # OK pci_ep_basic.CONSECUTIVE_BAR_TEST
+ ok 7 pci_ep_basic.CONSECUTIVE_BAR_TEST
+ # RUN pci_ep_basic.LEGACY_IRQ_TEST ...
+ # OK pci_ep_basic.LEGACY_IRQ_TEST
+ ok 8 pci_ep_basic.LEGACY_IRQ_TEST
+ # RUN pci_ep_basic.MSI_TEST ...
+ # OK pci_ep_basic.MSI_TEST
+ ok 9 pci_ep_basic.MSI_TEST
+ # RUN pci_ep_basic.MSIX_TEST ...
+ # OK pci_ep_basic.MSIX_TEST
+ ok 10 pci_ep_basic.MSIX_TEST
+ # RUN pci_ep_data_transfer.memcpy.READ_TEST ...
+ # OK pci_ep_data_transfer.memcpy.READ_TEST
+ ok 11 pci_ep_data_transfer.memcpy.READ_TEST
+ # RUN pci_ep_data_transfer.memcpy.WRITE_TEST ...
+ # OK pci_ep_data_transfer.memcpy.WRITE_TEST
+ ok 12 pci_ep_data_transfer.memcpy.WRITE_TEST
+ # RUN pci_ep_data_transfer.memcpy.COPY_TEST ...
+ # OK pci_ep_data_transfer.memcpy.COPY_TEST
+ ok 13 pci_ep_data_transfer.memcpy.COPY_TEST
+ # RUN pci_ep_data_transfer.dma.READ_TEST ...
+ # OK pci_ep_data_transfer.dma.READ_TEST
+ ok 14 pci_ep_data_transfer.dma.READ_TEST
+ # RUN pci_ep_data_transfer.dma.WRITE_TEST ...
+ # OK pci_ep_data_transfer.dma.WRITE_TEST
+ ok 15 pci_ep_data_transfer.dma.WRITE_TEST
+ # RUN pci_ep_data_transfer.dma.COPY_TEST ...
+ # OK pci_ep_data_transfer.dma.COPY_TEST
+ ok 16 pci_ep_data_transfer.dma.COPY_TEST
+ # PASSED: 16 / 16 tests passed.
+ # Totals: pass:16 fail:0 xfail:0 xpass:0 skip:0 error:0
+
+
+Testcase 16 (pci_ep_data_transfer.dma.COPY_TEST) will fail for most of the DMA
+capable endpoint controllers due to the absence of the MEMCPY over DMA. For such
+controllers, it is advisable to skip this testcase using this
+command::
+
+ # pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma
+
+Kselftest EP Doorbell
+~~~~~~~~~~~~~~~~~~~~~
+
+If the Endpoint MSI controller is used for the doorbell usecase, run below
+command for testing it:
+
+ # pci_endpoint_test -f pcie_ep_doorbell
+
+ # Starting 1 tests from 1 test cases.
+ # RUN pcie_ep_doorbell.DOORBELL_TEST ...
+ # OK pcie_ep_doorbell.DOORBELL_TEST
+ ok 1 pcie_ep_doorbell.DOORBELL_TEST
+ # PASSED: 1 / 1 tests passed.
+ # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0
diff --git a/Documentation/PCI/endpoint/pci-vntb-howto.rst b/Documentation/PCI/endpoint/pci-vntb-howto.rst
index 70d3bc90893f..9a7a2f0a6849 100644
--- a/Documentation/PCI/endpoint/pci-vntb-howto.rst
+++ b/Documentation/PCI/endpoint/pci-vntb-howto.rst
@@ -90,8 +90,9 @@ of the function device and is populated with the following NTB specific
attributes that can be configured by the user::
# ls functions/pci_epf_vntb/func1/pci_epf_vntb.0/
- db_count mw1 mw2 mw3 mw4 num_mws
- spad_count
+ ctrl_bar db_count mw1_bar mw2_bar mw3_bar mw4_bar spad_count
+ db_bar mw1 mw2 mw3 mw4 num_mws vbus_number
+ vntb_vid vntb_pid
A sample configuration for NTB function is given below::
@@ -100,6 +101,10 @@ A sample configuration for NTB function is given below::
# echo 1 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/num_mws
# echo 0x100000 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/mw1
+By default, each construct is assigned a BAR, as needed and in order.
+Should a specific BAR setup be required by the platform, BAR may be assigned
+to each construct using the related ``XYZ_bar`` entry.
+
A sample configuration for virtual NTB driver for virtual PCI bus::
# echo 0x1957 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vntb_vid
diff --git a/Documentation/PCI/index.rst b/Documentation/PCI/index.rst
index e73f84aebde3..5d720d2a415e 100644
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@@ -17,4 +17,6 @@ PCI Bus Subsystem
pci-error-recovery
pcieaer-howto
endpoint/index
+ controller/index
boot-interrupts
+ tph
diff --git a/Documentation/PCI/msi-howto.rst b/Documentation/PCI/msi-howto.rst
index 783d30b7bb42..0692c9aec66f 100644
--- a/Documentation/PCI/msi-howto.rst
+++ b/Documentation/PCI/msi-howto.rst
@@ -103,7 +103,7 @@ min_vecs argument set to this limit, and the PCI core will return -ENOSPC
if it can't meet the minimum number of vectors.
The flags argument is used to specify which type of interrupt can be used
-by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX).
+by the device and the driver (PCI_IRQ_INTX, PCI_IRQ_MSI, PCI_IRQ_MSIX).
A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for
any possible kind of interrupt. If the PCI_IRQ_AFFINITY flag is set,
pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
index 42e1e78353f3..43bc4e3665b4 100644
--- a/Documentation/PCI/pci-error-recovery.rst
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -13,7 +13,7 @@ PCI Error Recovery
Many PCI bus controllers are able to detect a variety of hardware
PCI errors on the bus, such as parity errors on the data and address
buses, as well as SERR and PERR errors. Some of the more advanced
-chipsets are able to deal with these errors; these include PCI-E chipsets,
+chipsets are able to deal with these errors; these include PCIe chipsets,
and the PCI-host bridges found on IBM Power4, Power5 and Power6-based
pSeries boxes. A typical action taken is to disconnect the affected device,
halting all I/O to it. The goal of a disconnection is to avoid system
@@ -108,8 +108,8 @@ A driver does not have to implement all of these callbacks; however,
if it implements any, it must implement error_detected(). If a callback
is not implemented, the corresponding feature is considered unsupported.
For example, if mmio_enabled() and resume() aren't there, then it
-is assumed that the driver is not doing any direct recovery and requires
-a slot reset. Typically a driver will want to know about
+is assumed that the driver does not need these callbacks
+for recovery. Typically a driver will want to know about
a slot_reset().
The actual steps taken by a platform to recover from a PCI error
@@ -122,6 +122,10 @@ A PCI bus error is detected by the PCI hardware. On powerpc, the slot
is isolated, in that all I/O is blocked: all reads return 0xffffffff,
all writes are ignored.
+Similarly, on platforms supporting Downstream Port Containment
+(PCIe r7.0 sec 6.2.11), the link to the sub-hierarchy with the
+faulting device is disabled. Any device in the sub-hierarchy
+becomes inaccessible.
STEP 1: Notification
--------------------
@@ -141,6 +145,9 @@ shouldn't do any new IOs. Called in task context. This is sort of a
All drivers participating in this system must implement this call.
The driver must return one of the following result codes:
+ - PCI_ERS_RESULT_RECOVERED
+ Driver returns this if it thinks the device is usable despite
+ the error and does not need further intervention.
- PCI_ERS_RESULT_CAN_RECOVER
Driver returns this if it thinks it might be able to recover
the HW by just banging IOs or if it wants to be given
@@ -199,7 +206,25 @@ reset or some such, but not restart operations. This callback is made if
all drivers on a segment agree that they can try to recover and if no automatic
link reset was performed by the HW. If the platform can't just re-enable IOs
without a slot reset or a link reset, it will not call this callback, and
-instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
+instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset).
+
+.. note::
+
+ On platforms supporting Advanced Error Reporting (PCIe r7.0 sec 6.2),
+ the faulting device may already be accessible in STEP 1 (Notification).
+ Drivers should nevertheless defer accesses to STEP 2 (MMIO Enabled)
+ to be compatible with EEH on powerpc and with s390 (where devices are
+ inaccessible until STEP 2).
+
+ On platforms supporting Downstream Port Containment, the link to the
+ sub-hierarchy with the faulting device is re-enabled in STEP 3 (Link
+ Reset). Hence devices in the sub-hierarchy are inaccessible until
+ STEP 4 (Slot Reset).
+
+ For errors such as Surprise Down (PCIe r7.0 sec 6.2.7), the device
+ may not even be accessible in STEP 4 (Slot Reset). Drivers can detect
+ accessibility by checking whether reads from the device return all 1's
+ (PCI_POSSIBLE_ERROR()).
.. note::
@@ -234,14 +259,14 @@ The driver should return one of the following result codes:
The next step taken depends on the results returned by the drivers.
If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform
-proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
+proceeds to either STEP 3 (Link Reset) or to STEP 5 (Resume Operations).
If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
proceeds to STEP 4 (Slot Reset)
STEP 3: Link Reset
------------------
-The platform resets the link. This is a PCI-Express specific step
+The platform resets the link. This is a PCIe specific step
and is done whenever a fatal error has been detected that can be
"solved" by resetting the link.
@@ -263,13 +288,13 @@ that is equivalent to what it would be after a fresh system
power-on followed by power-on BIOS/system firmware initialization.
Soft reset is also known as hot-reset.
-Powerpc fundamental reset is supported by PCI Express cards only
+Powerpc fundamental reset is supported by PCIe cards only
and results in device's state machines, hardware logic, port states and
configuration registers to initialize to their default conditions.
For most PCI devices, a soft reset will be sufficient for recovery.
Optional fundamental reset is provided to support a limited number
-of PCI Express devices for which a soft reset is not sufficient
+of PCIe devices for which a soft reset is not sufficient
for recovery.
If the platform supports PCI hotplug, then the reset might be
@@ -301,6 +326,21 @@ be recovered, there is nothing more that can be done; the platform
will typically report a "permanent failure" in such a case. The
device will be considered "dead" in this case.
+Drivers typically need to call pci_restore_state() after reset to
+re-initialize the device's config space registers and thereby
+bring it from D0\ :sub:`uninitialized` into D0\ :sub:`active` state
+(PCIe r7.0 sec 5.3.1.1). The PCI core invokes pci_save_state()
+on enumeration after initializing config space to ensure that a
+saved state is available for subsequent error recovery.
+Drivers which modify config space on probe may need to invoke
+pci_save_state() afterwards to record those changes for later
+error recovery. When going into system suspend, pci_save_state()
+is called for every PCI device and that state will be restored
+not only on resume, but also on any subsequent error recovery.
+In the unlikely event that the saved state recorded on suspend
+is unsuitable for error recovery, drivers should call
+pci_save_state() on resume.
+
Drivers for multi-function cards will need to coordinate among
themselves as to which driver instance will perform any "one-shot"
or global device initialization. For example, the Symbios sym53cxx2
@@ -313,7 +353,7 @@ Result codes:
- PCI_ERS_RESULT_DISCONNECT
Same as above.
-Drivers for PCI Express cards that require a fundamental reset must
+Drivers for PCIe cards that require a fundamental reset must
set the needs_freset bit in the pci_dev structure in their probe function.
For example, the QLogic qla2xxx driver sets the needs_freset bit for certain
PCI card types::
diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst
index cced568d78e9..f4d2662871ab 100644
--- a/Documentation/PCI/pci.rst
+++ b/Documentation/PCI/pci.rst
@@ -52,7 +52,7 @@ driver generally needs to perform the following initialization:
- Enable DMA/processing engines
When done using the device, and perhaps the module needs to be unloaded,
-the driver needs to take the follow steps:
+the driver needs to take the following steps:
- Disable the device from generating IRQs
- Release the IRQ (free_irq())
@@ -335,7 +335,7 @@ causes the PCI support to program CPU vector data into the PCI device
capability registers. Many architectures, chip-sets, or BIOSes do NOT
support MSI or MSI-X and a call to pci_alloc_irq_vectors with just
the PCI_IRQ_MSI and PCI_IRQ_MSIX flags will fail, so try to always
-specify PCI_IRQ_LEGACY as well.
+specify PCI_IRQ_INTX as well.
Drivers that have different interrupt handlers for MSI/MSI-X and
legacy INTx should chose the right one based on the msi_enabled
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst
index e00d63971695..3210c4792978 100644
--- a/Documentation/PCI/pcieaer-howto.rst
+++ b/Documentation/PCI/pcieaer-howto.rst
@@ -70,27 +70,42 @@ AER error output
----------------
When a PCIe AER error is captured, an error message will be output to
-console. If it's a correctable error, it is output as an info message.
+console. If it's a correctable error, it is output as a warning message.
Otherwise, it is printed as an error. So users could choose different
log level to filter out correctable error messages.
Below shows an example::
- 0000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID)
+ 0000:50:00.0: PCIe Bus Error: severity=Uncorrectable (Fatal), type=Transaction Layer, (Requester ID)
0000:50:00.0: device [8086:0329] error status/mask=00100000/00000000
- 0000:50:00.0: [20] Unsupported Request (First)
- 0000:50:00.0: TLP Header: 04000001 00200a03 05010000 00050100
+ 0000:50:00.0: [20] UnsupReq (First)
+ 0000:50:00.0: TLP Header: 0x04000001 0x00200a03 0x05010000 0x00050100
In the example, 'Requester ID' means the ID of the device that sent
the error message to the Root Port. Please refer to PCIe specs for other
fields.
+AER Ratelimits
+--------------
+
+Since error messages can be generated for each transaction, we may see
+large volumes of errors reported. To prevent spammy devices from flooding
+the console/stalling execution, messages are throttled by device and error
+type (correctable vs. non-fatal uncorrectable). Fatal errors, including
+DPC errors, are not ratelimited.
+
+AER uses the default ratelimit of DEFAULT_RATELIMIT_BURST (10 events) over
+DEFAULT_RATELIMIT_INTERVAL (5 seconds).
+
+Ratelimits are exposed in the form of sysfs attributes and configurable.
+See Documentation/ABI/testing/sysfs-bus-pci-devices-aer.
+
AER Statistics / Counters
-------------------------
When PCIe AER errors are captured, the counters / statistics are also exposed
in the form of sysfs attributes which are documented at
-Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+Documentation/ABI/testing/sysfs-bus-pci-devices-aer.
Developer Guide
===============
@@ -123,7 +138,7 @@ error message to the Root Port above it when it captures
an error. The Root Port, upon receiving an error reporting message,
internally processes and logs the error message in its AER
Capability structure. Error information being logged includes storing
-the error reporting agent's requestor ID into the Error Source
+the error reporting agent's Requester ID into the Error Source
Identification Registers and setting the error bits of the Root Error
Status Register accordingly. If AER error reporting is enabled in the Root
Error Command Register, the Root Port generates an interrupt when an
@@ -137,18 +152,6 @@ the device driver.
Provide callbacks
-----------------
-callback reset_link to reset PCIe link
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This callback is used to reset the PCIe physical link when a
-fatal error happens. The Root Port AER service driver provides a
-default reset_link function, but different Upstream Ports might
-have different specifications to reset the PCIe link, so
-Upstream Port drivers may provide their own reset_link functions.
-
-Section 3.2.2.2 provides more detailed info on when to call
-reset_link.
-
PCI error-recovery callbacks
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -159,8 +162,8 @@ when performing error recovery actions.
Data struct pci_driver has a pointer, err_handler, to point to
pci_error_handlers who consists of a couple of callback function
pointers. The AER driver follows the rules defined in
-pci-error-recovery.rst except PCIe-specific parts (e.g.
-reset_link). Please refer to pci-error-recovery.rst for detailed
+pci-error-recovery.rst except PCIe-specific parts (see
+below). Please refer to pci-error-recovery.rst for detailed
definitions of the callbacks.
The sections below specify when to call the error callback functions.
@@ -174,10 +177,21 @@ software intervention or any loss of data. These errors do not
require any recovery actions. The AER driver clears the device's
correctable error status register accordingly and logs these errors.
-Non-correctable (non-fatal and fatal) errors
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Uncorrectable (non-fatal and fatal) errors
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The AER driver performs a Secondary Bus Reset to recover from
+uncorrectable errors. The reset is applied at the port above
+the originating device: If the originating device is an Endpoint,
+only the Endpoint is reset. If on the other hand the originating
+device has subordinate devices, those are all affected by the
+reset as well.
-If an error message indicates a non-fatal error, performing link reset
+If the originating device is a Root Complex Integrated Endpoint,
+there's no port above where a Secondary Bus Reset could be applied.
+In this case, the AER driver instead applies a Function Level Reset.
+
+If an error message indicates a non-fatal error, performing a reset
at upstream is not required. The AER driver calls error_detected(dev,
pci_channel_io_normal) to all drivers associated within a hierarchy in
question. For example::
@@ -189,38 +203,34 @@ Downstream Port B and Endpoint.
A driver may return PCI_ERS_RESULT_CAN_RECOVER,
PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on
-whether it can recover or the AER driver calls mmio_enabled as next.
+whether it can recover without a reset, considers the device unrecoverable
+or needs a reset for recovery. If all affected drivers agree that they can
+recover without a reset, it is skipped. Should one driver request a reset,
+it overrides all other drivers.
If an error message indicates a fatal error, kernel will broadcast
error_detected(dev, pci_channel_io_frozen) to all drivers within
-a hierarchy in question. Then, performing link reset at upstream is
-necessary. As different kinds of devices might use different approaches
-to reset link, AER port service driver is required to provide the
-function to reset link via callback parameter of pcie_do_recovery()
-function. If reset_link is not NULL, recovery function will use it
-to reset the link. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER
-and reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
-to mmio_enabled.
-
-Frequent Asked Questions
-------------------------
+a hierarchy in question. Then, performing a reset at upstream is
+necessary. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER
+to indicate that recovery without a reset is possible, the error
+handling goes to mmio_enabled, but afterwards a reset is still
+performed.
-Q:
- What happens if a PCIe device driver does not provide an
- error recovery handler (pci_driver->err_handler is equal to NULL)?
+In other words, for non-fatal errors, drivers may opt in to a reset.
+But for fatal errors, they cannot opt out of a reset, based on the
+assumption that the link is unreliable.
-A:
- The devices attached with the driver won't be recovered. If the
- error is fatal, kernel will print out warning messages. Please refer
- to section 3 for more information.
+Frequently Asked Questions
+--------------------------
Q:
- What happens if an upstream port service driver does not provide
- callback reset_link?
+ What happens if a PCIe device driver does not provide an
+ error recovery handler (pci_driver->err_handler is equal to NULL)?
A:
- Fatal error recovery will fail if the errors are reported by the
- upstream ports who are attached by the service driver.
+ The devices attached with the driver won't be recovered.
+ The kernel will print out informational messages to identify
+ unrecoverable devices.
Software error injection
@@ -241,7 +251,7 @@ After reboot with new kernel or insert the module, a device file named
Then, you need a user space tool named aer-inject, which can be gotten
from:
- https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
+ https://github.com/intel/aer-inject.git
More information about aer-inject can be found in the document in
its source code.
diff --git a/Documentation/PCI/pciebus-howto.rst b/Documentation/PCI/pciebus-howto.rst
index a0027e8fb0d0..375d9ce171f6 100644
--- a/Documentation/PCI/pciebus-howto.rst
+++ b/Documentation/PCI/pciebus-howto.rst
@@ -139,7 +139,7 @@ driver data structure.
static struct pcie_port_service_driver root_aerdrv = {
.name = (char *)device_name,
- .id_table = &service_id[0],
+ .id_table = service_id,
.probe = aerdrv_load,
.remove = aerdrv_unload,
@@ -217,8 +217,12 @@ capability structure except the PCI Express capability structure,
that is shared between many drivers including the service drivers.
RMW Capability accessors (pcie_capability_clear_and_set_word(),
pcie_capability_set_word(), and pcie_capability_clear_word()) protect
-a selected set of PCI Express Capability Registers (Link Control
-Register and Root Control Register). Any change to those registers
-should be performed using RMW accessors to avoid problems due to
-concurrent updates. For the up-to-date list of protected registers,
-see pcie_capability_clear_and_set_word().
+a selected set of PCI Express Capability Registers:
+
+* Link Control Register
+* Root Control Register
+* Link Control 2 Register
+
+Any change to those registers should be performed using RMW accessors to
+avoid problems due to concurrent updates. For the up-to-date list of
+protected registers, see pcie_capability_clear_and_set_word().
diff --git a/Documentation/PCI/tph.rst b/Documentation/PCI/tph.rst
new file mode 100644
index 000000000000..e8993be64fd6
--- /dev/null
+++ b/Documentation/PCI/tph.rst
@@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+
+===========
+TPH Support
+===========
+
+:Copyright: 2024 Advanced Micro Devices, Inc.
+:Authors: - Eric van Tassell <eric.vantassell@amd.com>
+ - Wei Huang <wei.huang2@amd.com>
+
+
+Overview
+========
+
+TPH (TLP Processing Hints) is a PCIe feature that allows endpoint devices
+to provide optimization hints for requests that target memory space.
+These hints, in a format called Steering Tags (STs), are embedded in the
+requester's TLP headers, enabling the system hardware, such as the Root
+Complex, to better manage platform resources for these requests.
+
+For example, on platforms with TPH-based direct data cache injection
+support, an endpoint device can include appropriate STs in its DMA
+traffic to specify which cache the data should be written to. This allows
+the CPU core to have a higher probability of getting data from cache,
+potentially improving performance and reducing latency in data
+processing.
+
+
+How to Use TPH
+==============
+
+TPH is presented as an optional extended capability in PCIe. The Linux
+kernel handles TPH discovery during boot, but it is up to the device
+driver to request TPH enablement if it is to be utilized. Once enabled,
+the driver uses the provided API to obtain the Steering Tag for the
+target memory and to program the ST into the device's ST table.
+
+Enable TPH support in Linux
+---------------------------
+
+To support TPH, the kernel must be built with the CONFIG_PCIE_TPH option
+enabled.
+
+Manage TPH
+----------
+
+To enable TPH for a device, use the following function::
+
+ int pcie_enable_tph(struct pci_dev *pdev, int mode);
+
+This function enables TPH support for device with a specific ST mode.
+Current supported modes include:
+
+ * PCI_TPH_ST_NS_MODE - NO ST Mode
+ * PCI_TPH_ST_IV_MODE - Interrupt Vector Mode
+ * PCI_TPH_ST_DS_MODE - Device Specific Mode
+
+`pcie_enable_tph()` checks whether the requested mode is actually
+supported by the device before enabling. The device driver can figure out
+which TPH mode is supported and can be properly enabled based on the
+return value of `pcie_enable_tph()`.
+
+To disable TPH, use the following function::
+
+ void pcie_disable_tph(struct pci_dev *pdev);
+
+Manage ST
+---------
+
+Steering Tags are platform specific. PCIe spec does not specify where STs
+are from. Instead PCI Firmware Specification defines an ACPI _DSM method
+(see the `Revised _DSM for Cache Locality TPH Features ECN
+<https://members.pcisig.com/wg/PCI-SIG/document/15470>`_) for retrieving
+STs for a target memory of various properties. This method is what is
+supported in this implementation.
+
+To retrieve a Steering Tag for a target memory associated with a specific
+CPU, use the following function::
+
+ int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type type,
+ unsigned int cpu_uid, u16 *tag);
+
+The `type` argument is used to specify the memory type, either volatile
+or persistent, of the target memory. The `cpu_uid` argument specifies the
+CPU where the memory is associated to.
+
+After the ST value is retrieved, the device driver can use the following
+function to write the ST into the device::
+
+ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index,
+ u16 tag);
+
+The `index` argument is the ST table entry index the ST tag will be
+written into. `pcie_tph_set_st_entry()` will figure out the proper
+location of ST table, either in the MSI-X table or in the TPH Extended
+Capability space, and write the Steering Tag into the ST entry pointed by
+the `index` argument.
+
+It is completely up to the driver to decide how to use these TPH
+functions. For example a network device driver can use the TPH APIs above
+to update the Steering Tag when interrupt affinity of a RX/TX queue has
+been changed. Here is a sample code for IRQ affinity notifier:
+
+.. code-block:: c
+
+ static void irq_affinity_notified(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+ {
+ struct drv_irq *irq;
+ unsigned int cpu_id;
+ u16 tag;
+
+ irq = container_of(notify, struct drv_irq, affinity_notify);
+ cpumask_copy(irq->cpu_mask, mask);
+
+ /* Pick a right CPU as the target - here is just an example */
+ cpu_id = cpumask_first(irq->cpu_mask);
+
+ if (pcie_tph_get_cpu_st(irq->pdev, TPH_MEM_TYPE_VM, cpu_id,
+ &tag))
+ return;
+
+ if (pcie_tph_set_st_entry(irq->pdev, irq->msix_nr, tag))
+ return;
+ }
+
+Disable TPH system-wide
+-----------------------
+
+There is a kernel command line option available to control TPH feature:
+ * "notph": TPH will be disabled for all endpoint devices.
diff --git a/Documentation/RAS/ras.rst b/Documentation/RAS/ras.rst
deleted file mode 100644
index 2556b397cd27..000000000000
--- a/Documentation/RAS/ras.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-Reliability, Availability and Serviceability features
-=====================================================
-
-This documents different aspects of the RAS functionality present in the
-kernel.
-
-Error decoding
----------------
-
-* x86
-
-Error decoding on AMD systems should be done using the rasdaemon tool:
-https://github.com/mchehab/rasdaemon/
-
-While the daemon is running, it would automatically log and decode
-errors. If not, one can still decode such errors by supplying the
-hardware information from the error::
-
- $ rasdaemon -p --status <STATUS> --ipid <IPID> --smca
-
-Also, the user can pass particular family and model to decode the error
-string::
-
- $ rasdaemon -p --status <STATUS> --ipid <IPID> --smca --family <CPU Family> --model <CPU Model> --bank <BANK_NUM>
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
index b34990c7c377..1b0aad184dd7 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst
@@ -286,6 +286,39 @@ in order to detect the beginnings and ends of grace periods in a
distributed fashion. The values flow from ``rcu_state`` to ``rcu_node``
(down the tree from the root to the leaves) to ``rcu_data``.
++-----------------------------------------------------------------------+
+| **Quick Quiz**: |
++-----------------------------------------------------------------------+
+| Given that the root rcu_node structure has a gp_seq field, |
+| why does RCU maintain a separate gp_seq in the rcu_state structure? |
+| Why not just use the root rcu_node's gp_seq as the official record |
+| and update it directly when starting a new grace period? |
++-----------------------------------------------------------------------+
+| **Answer**: |
++-----------------------------------------------------------------------+
+| On single-node RCU trees (where the root node is also a leaf), |
+| updating the root node's gp_seq immediately would create unnecessary |
+| lock contention. Here's why: |
+| |
+| If we did rcu_seq_start() directly on the root node's gp_seq: |
+| |
+| 1. All CPUs would immediately see their node's gp_seq from their rdp's|
+| gp_seq, in rcu_pending(). They would all then invoke the RCU-core. |
+| 2. Which calls note_gp_changes() and try to acquire the node lock. |
+| 3. But rnp->qsmask isn't initialized yet (happens later in |
+| rcu_gp_init()) |
+| 4. So each CPU would acquire the lock, find it can't determine if it |
+| needs to report quiescent state (no qsmask), update rdp->gp_seq, |
+| and release the lock. |
+| 5. Result: Lots of lock acquisitions with no grace period progress |
+| |
+| By having a separate rcu_state.gp_seq, we can increment the official |
+| grace period counter without immediately affecting what CPUs see in |
+| their nodes. The hierarchical propagation in rcu_gp_init() then |
+| updates the root node's gp_seq and qsmask together under the same lock|
+| acquisition, avoiding this useless contention. |
++-----------------------------------------------------------------------+
+
Miscellaneous
'''''''''''''
@@ -921,10 +954,10 @@ This portion of the ``rcu_data`` structure is declared as follows:
::
- 1 int dynticks_snap;
+ 1 int watching_snap;
2 unsigned long dynticks_fqs;
-The ``->dynticks_snap`` field is used to take a snapshot of the
+The ``->watching_snap`` field is used to take a snapshot of the
corresponding CPU's dyntick-idle state when forcing quiescent states,
and is therefore accessed from other CPUs. Finally, the
``->dynticks_fqs`` field is used to count the number of times this CPU
@@ -935,8 +968,8 @@ This portion of the rcu_data structure is declared as follows:
::
- 1 long dynticks_nesting;
- 2 long dynticks_nmi_nesting;
+ 1 long nesting;
+ 2 long nmi_nesting;
3 atomic_t dynticks;
4 bool rcu_need_heavy_qs;
5 bool rcu_urgent_qs;
@@ -945,14 +978,14 @@ These fields in the rcu_data structure maintain the per-CPU dyntick-idle
state for the corresponding CPU. The fields may be accessed only from
the corresponding CPU (and from tracing) unless otherwise stated.
-The ``->dynticks_nesting`` field counts the nesting depth of process
+The ``->nesting`` field counts the nesting depth of process
execution, so that in normal circumstances this counter has value zero
or one. NMIs, irqs, and tracers are counted by the
-``->dynticks_nmi_nesting`` field. Because NMIs cannot be masked, changes
+``->nmi_nesting`` field. Because NMIs cannot be masked, changes
to this variable have to be undertaken carefully using an algorithm
provided by Andy Lutomirski. The initial transition from idle adds one,
and nested transitions add two, so that a nesting level of five is
-represented by a ``->dynticks_nmi_nesting`` value of nine. This counter
+represented by a ``->nmi_nesting`` value of nine. This counter
can therefore be thought of as counting the number of reasons why this
CPU cannot be permitted to enter dyntick-idle mode, aside from
process-level transitions.
@@ -960,12 +993,12 @@ process-level transitions.
However, it turns out that when running in non-idle kernel context, the
Linux kernel is fully capable of entering interrupt handlers that never
exit and perhaps also vice versa. Therefore, whenever the
-``->dynticks_nesting`` field is incremented up from zero, the
-``->dynticks_nmi_nesting`` field is set to a large positive number, and
-whenever the ``->dynticks_nesting`` field is decremented down to zero,
-the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that
+``->nesting`` field is incremented up from zero, the
+``->nmi_nesting`` field is set to a large positive number, and
+whenever the ``->nesting`` field is decremented down to zero,
+the ``->nmi_nesting`` field is set to zero. Assuming that
the number of misnested interrupts is not sufficient to overflow the
-counter, this approach corrects the ``->dynticks_nmi_nesting`` field
+counter, this approach corrects the ``->nmi_nesting`` field
every time the corresponding CPU enters the idle loop from process
context.
@@ -992,8 +1025,8 @@ code.
+-----------------------------------------------------------------------+
| **Quick Quiz**: |
+-----------------------------------------------------------------------+
-| Why not simply combine the ``->dynticks_nesting`` and |
-| ``->dynticks_nmi_nesting`` counters into a single counter that just |
+| Why not simply combine the ``->nesting`` and |
+| ``->nmi_nesting`` counters into a single counter that just |
| counts the number of reasons that the corresponding CPU is non-idle? |
+-----------------------------------------------------------------------+
| **Answer**: |
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
index 5750f125361b..1a5ff1a9f02e 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst
@@ -147,11 +147,11 @@ RCU read-side critical sections preceding and following the current
idle sojourn.
This case is handled by calls to the strongly ordered
``atomic_add_return()`` read-modify-write atomic operation that
-is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry
-time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time.
-The grace-period kthread invokes ``rcu_dynticks_snap()`` and
-``rcu_dynticks_in_eqs_since()`` (both of which invoke
-an ``atomic_add_return()`` of zero) to detect idle CPUs.
+is invoked within ``ct_kernel_exit_state()`` at idle-entry
+time and within ``ct_kernel_enter_state()`` at idle-exit time.
+The grace-period kthread invokes first ``ct_rcu_watching_cpu_acquire()``
+(preceded by a full memory barrier) and ``rcu_watching_snap_stopped_since()``
+(both of which rely on acquire semantics) to detect idle CPUs.
+-----------------------------------------------------------------------+
| **Quick Quiz**: |
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg
index 423df00c4df9..3fbc19c48a58 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-dyntick.svg
@@ -528,7 +528,7 @@
font-style="normal"
y="-8652.5312"
x="2466.7822"
- xml:space="preserve">dyntick_save_progress_counter()</text>
+ xml:space="preserve">rcu_watching_snap_save()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-2-7-2-0"
@@ -537,7 +537,7 @@
font-style="normal"
y="-8368.1475"
x="2463.3262"
- xml:space="preserve">rcu_implicit_dynticks_qs()</text>
+ xml:space="preserve">rcu_watching_snap_recheck()</text>
</g>
<g
id="g4504"
@@ -607,7 +607,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -638,7 +638,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg
index d82a77d03d8c..25c7acc8a4c2 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg
@@ -844,7 +844,7 @@
font-style="normal"
y="1547.8876"
x="4417.6396"
- xml:space="preserve">dyntick_save_progress_counter()</text>
+ xml:space="preserve">rcu_watching_snap_save()</text>
<g
style="fill:none;stroke-width:0.025in"
transform="translate(6501.9719,-10685.904)"
@@ -899,7 +899,7 @@
font-style="normal"
y="1858.8729"
x="4414.1836"
- xml:space="preserve">rcu_implicit_dynticks_qs()</text>
+ xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
xml:space="preserve"
x="14659.87"
@@ -977,7 +977,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -1008,7 +1008,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
index 53e0dc2a2c79..d05bc7b27edb 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
@@ -2974,7 +2974,7 @@
font-style="normal"
y="38114.047"
x="-334.33856"
- xml:space="preserve">dyntick_save_progress_counter()</text>
+ xml:space="preserve">rcu_watching_snap_save()</text>
<g
style="fill:none;stroke-width:0.025in"
transform="translate(1749.9916,25880.249)"
@@ -3029,7 +3029,7 @@
font-style="normal"
y="38425.035"
x="-337.79462"
- xml:space="preserve">rcu_implicit_dynticks_qs()</text>
+ xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
xml:space="preserve"
x="9907.8887"
@@ -3107,7 +3107,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_enter()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_exit_state()</text>
<text
xml:space="preserve"
x="3745.7725"
@@ -3138,7 +3138,7 @@
font-weight="bold"
font-size="192"
id="text202-7-5-3-27-6-1"
- style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_dynticks_eqs_exit()</text>
+ style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">ct_kernel_enter_state()</text>
<text
xml:space="preserve"
x="3745.7725"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg
index 4fa7506082bf..a92356ce4011 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg
@@ -516,7 +516,7 @@
font-style="normal"
y="-8652.5312"
x="2466.7822"
- xml:space="preserve">dyntick_save_progress_counter()</text>
+ xml:space="preserve">rcu_watching_snap_save()</text>
<text
style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
id="text202-7-2-7-2-0"
@@ -525,7 +525,7 @@
font-style="normal"
y="-8368.1475"
x="2463.3262"
- xml:space="preserve">rcu_implicit_dynticks_qs()</text>
+ xml:space="preserve">rcu_watching_snap_recheck()</text>
<text
sodipodi:linespacing="125%"
style="font-size:192px;font-style:normal;font-weight:bold;line-height:125%;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier"
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index cccafdaa1f84..ba417a08b93d 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -1970,6 +1970,130 @@ corresponding CPU's leaf node lock is held. This avoids race conditions
between RCU's hotplug notifier hooks, the grace period initialization
code, and the FQS loop, all of which refer to or modify this bookkeeping.
+Note that grace period initialization (rcu_gp_init()) must carefully sequence
+CPU hotplug scanning with grace period state changes. For example, the
+following race could occur in rcu_gp_init() if rcu_seq_start() were to happen
+after the CPU hotplug scanning::
+
+ CPU0 (rcu_gp_init) CPU1 CPU2
+ --------------------- ---- ----
+ // Hotplug scan first (WRONG ORDER)
+ rcu_for_each_leaf_node(rnp) {
+ rnp->qsmaskinit = rnp->qsmaskinitnext;
+ }
+ rcutree_report_cpu_starting()
+ rnp->qsmaskinitnext |= mask;
+ rcu_read_lock()
+ r0 = *X;
+ r1 = *X;
+ X = NULL;
+ cookie = get_state_synchronize_rcu();
+ // cookie = 8 (future GP)
+ rcu_seq_start(&rcu_state.gp_seq);
+ // gp_seq = 5
+
+ // CPU1 now invisible to this GP!
+ rcu_for_each_node_breadth_first() {
+ rnp->qsmask = rnp->qsmaskinit;
+ // CPU1 not included!
+ }
+
+ // GP completes without CPU1
+ rcu_seq_end(&rcu_state.gp_seq);
+ // gp_seq = 8
+ poll_state_synchronize_rcu(cookie);
+ // Returns true!
+ kfree(r1);
+ r2 = *r0; // USE-AFTER-FREE!
+
+By incrementing ``gp_seq`` first, CPU1's RCU read-side critical section
+is guaranteed to not be missed by CPU2.
+
+Concurrent Quiescent State Reporting for Offline CPUs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+RCU must ensure that CPUs going offline report quiescent states to avoid
+blocking grace periods. This requires careful synchronization to handle
+race conditions
+
+Race condition causing Offline CPU to hang GP
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A race between CPU offlining and new GP initialization (gp_init()) may occur
+because rcu_report_qs_rnp() in rcutree_report_cpu_dead() must temporarily
+release the ``rcu_node`` lock to wake the RCU grace-period kthread::
+
+ CPU1 (going offline) CPU0 (GP kthread)
+ -------------------- -----------------
+ rcutree_report_cpu_dead()
+ rcu_report_qs_rnp()
+ // Must release rnp->lock to wake GP kthread
+ raw_spin_unlock_irqrestore_rcu_node()
+ // Wakes up and starts new GP
+ rcu_gp_init()
+ // First loop:
+ copies qsmaskinitnext->qsmaskinit
+ // CPU1 still in qsmaskinitnext!
+
+ // Second loop:
+ rnp->qsmask = rnp->qsmaskinit
+ mask = rnp->qsmask & ~rnp->qsmaskinitnext
+ // mask is 0! CPU1 still in both masks
+ // Reacquire lock (but too late)
+ rnp->qsmaskinitnext &= ~mask // Finally clears bit
+
+Without ``ofl_lock``, the new grace period includes the offline CPU and waits
+forever for its quiescent state causing a GP hang.
+
+A solution with ofl_lock
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``ofl_lock`` (offline lock) prevents rcu_gp_init() from running during
+the vulnerable window when rcu_report_qs_rnp() has released ``rnp->lock``::
+
+ CPU0 (rcu_gp_init) CPU1 (rcutree_report_cpu_dead)
+ ------------------ ------------------------------
+ rcu_for_each_leaf_node(rnp) {
+ arch_spin_lock(&ofl_lock) -----> arch_spin_lock(&ofl_lock) [BLOCKED]
+
+ // Safe: CPU1 can't interfere
+ rnp->qsmaskinit = rnp->qsmaskinitnext
+
+ arch_spin_unlock(&ofl_lock) ---> // Now CPU1 can proceed
+ } // But snapshot already taken
+
+Another race causing GP hangs in rcu_gpu_init(): Reporting QS for Now-offline CPUs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+After the first loop takes an atomic snapshot of online CPUs, as shown above,
+the second loop in rcu_gp_init() detects CPUs that went offline between
+releasing ``ofl_lock`` and acquiring the per-node ``rnp->lock``.
+This detection is crucial because:
+
+1. The CPU might have gone offline after the snapshot but before the second loop
+2. The offline CPU cannot report its own QS if it's already dead
+3. Without this detection, the grace period would wait forever for CPUs that
+ are now offline.
+
+The second loop performs this detection safely::
+
+ rcu_for_each_node_breadth_first(rnp) {
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rnp->qsmask = rnp->qsmaskinit; // Apply the snapshot
+
+ // Detect CPUs offline after snapshot
+ mask = rnp->qsmask & ~rnp->qsmaskinitnext;
+
+ if (mask && rcu_is_leaf_node(rnp))
+ rcu_report_qs_rnp(mask, ...) // Report QS for offline CPUs
+ }
+
+This approach ensures atomicity: quiescent state reporting for offline CPUs
+happens either in rcu_gp_init() (second loop) or in rcutree_report_cpu_dead(),
+never both and never neither. The ``rnp->lock`` held throughout the sequence
+prevents races - rcutree_report_cpu_dead() also acquires this lock when
+clearing ``qsmaskinitnext``, ensuring mutual exclusion.
+
Scheduler and RCU
~~~~~~~~~~~~~~~~~
@@ -2357,6 +2481,7 @@ section.
#. `Sched Flavor (Historical)`_
#. `Sleepable RCU`_
#. `Tasks RCU`_
+#. `Tasks Trace RCU`_
Bottom-Half Flavor (Historical)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2512,15 +2637,16 @@ synchronize_srcu() for some other domain ``ss1``, and if an
that was held across as ``ss``-domain synchronize_srcu(), deadlock
would again be possible. Such a deadlock cycle could extend across an
arbitrarily large number of different SRCU domains. Again, with great
-power comes great responsibility.
+power comes great responsibility, though lockdep is now able to detect
+this sort of deadlock.
-Unlike the other RCU flavors, SRCU read-side critical sections can run
-on idle and even offline CPUs. This ability requires that
-srcu_read_lock() and srcu_read_unlock() contain memory barriers,
-which means that SRCU readers will run a bit slower than would RCU
-readers. It also motivates the smp_mb__after_srcu_read_unlock() API,
-which, in combination with srcu_read_unlock(), guarantees a full
-memory barrier.
+Unlike the other RCU flavors, SRCU read-side critical sections can run on
+idle and even offline CPUs, with the exception of srcu_read_lock_fast()
+and friends. This ability requires that srcu_read_lock() and
+srcu_read_unlock() contain memory barriers, which means that SRCU
+readers will run a bit slower than would RCU readers. It also motivates
+the smp_mb__after_srcu_read_unlock() API, which, in combination with
+srcu_read_unlock(), guarantees a full memory barrier.
Also unlike other RCU flavors, synchronize_srcu() may **not** be
invoked from CPU-hotplug notifiers, due to the fact that SRCU grace
@@ -2556,15 +2682,15 @@ run some tests first. SRCU just might need a few adjustment to deal with
that sort of load. Of course, your mileage may vary based on the speed
of your CPUs and the size of your memory.
-The `SRCU
-API <https://lwn.net/Articles/609973/#RCU%20Per-Flavor%20API%20Table>`__
+The `SRCU API
+<https://lwn.net/Articles/609973/#RCU%20Per-Flavor%20API%20Table>`__
includes srcu_read_lock(), srcu_read_unlock(),
-srcu_dereference(), srcu_dereference_check(),
-synchronize_srcu(), synchronize_srcu_expedited(),
-call_srcu(), srcu_barrier(), and srcu_read_lock_held(). It
-also includes DEFINE_SRCU(), DEFINE_STATIC_SRCU(), and
-init_srcu_struct() APIs for defining and initializing
-``srcu_struct`` structures.
+srcu_dereference(), srcu_dereference_check(), synchronize_srcu(),
+synchronize_srcu_expedited(), call_srcu(), srcu_barrier(),
+and srcu_read_lock_held(). It also includes DEFINE_SRCU(),
+DEFINE_STATIC_SRCU(), DEFINE_SRCU_FAST(), DEFINE_STATIC_SRCU_FAST(),
+init_srcu_struct(), and init_srcu_struct_fast() APIs for defining and
+initializing ``srcu_struct`` structures.
More recently, the SRCU API has added polling interfaces:
@@ -2610,6 +2736,16 @@ critical sections that are delimited by voluntary context switches, that
is, calls to schedule(), cond_resched(), and
synchronize_rcu_tasks(). In addition, transitions to and from
userspace execution also delimit tasks-RCU read-side critical sections.
+Idle tasks are ignored by Tasks RCU, and Tasks Rude RCU may be used to
+interact with them.
+
+Note well that involuntary context switches are *not* Tasks-RCU quiescent
+states. After all, in preemptible kernels, a task executing code in a
+trampoline might be preempted. In this case, the Tasks-RCU grace period
+clearly cannot end until that task resumes and its execution leaves that
+trampoline. This means, among other things, that cond_resched() does
+not provide a Tasks RCU quiescent state. (Instead, use rcu_softirq_qs()
+from softirq or rcu_tasks_classic_qs() otherwise.)
The tasks-RCU API is quite compact, consisting only of
call_rcu_tasks(), synchronize_rcu_tasks(), and
@@ -2632,9 +2768,13 @@ moniker. And this operation is considered to be quite rude by real-time
workloads that don't want their ``nohz_full`` CPUs receiving IPIs and
by battery-powered systems that don't want their idle CPUs to be awakened.
+Once kernel entry/exit and deep-idle functions have been properly tagged
+``noinstr``, Tasks RCU can start paying attention to idle tasks (except
+those that are idle from RCU's perspective) and then Tasks Rude RCU can
+be removed from the kernel.
+
The tasks-rude-RCU API is also reader-marking-free and thus quite compact,
-consisting of call_rcu_tasks_rude(), synchronize_rcu_tasks_rude(),
-and rcu_barrier_tasks_rude().
+consisting solely of synchronize_rcu_tasks_rude().
Tasks Trace RCU
~~~~~~~~~~~~~~~
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index db8f16b392aa..8d4e8de4c460 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -641,7 +641,7 @@ Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni"
,Month="July"
,Year="2001"
,note="Available:
-\url{http://www.linuxsymposium.org/2001/abstracts/readcopy.php}
+\url{https://kernel.org/doc/ols/2001/read-copy.pdf}
\url{http://www.rdrop.com/users/paulmck/RCU/rclock_OLS.2001.05.01c.pdf}
[Viewed June 23, 2004]"
,annotation={
@@ -1480,7 +1480,7 @@ Suparna Bhattacharya"
,Year="2006"
,pages="v2 123-138"
,note="Available:
-\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
+\url{https://kernel.org/doc/ols/2006/ols2006v2-pages-131-146.pdf}
\url{http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf}
[Viewed January 1, 2007]"
,annotation={
@@ -1511,7 +1511,7 @@ Canis Rufus and Zoicon5 and Anome and Hal Eisen"
,Year="2006"
,pages="v2 249-254"
,note="Available:
-\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
+\url{https://kernel.org/doc/ols/2006/ols2006v2-pages-249-262.pdf}
[Viewed January 11, 2009]"
,annotation={
Uses RCU-protected radix tree for a lockless page cache.
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index 2d42998a89a6..4b30f701225f 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -68,7 +68,14 @@ over a rather long period of time, but improvements are always welcome!
rcu_read_lock_sched(), or by the appropriate update-side lock.
Explicit disabling of preemption (preempt_disable(), for example)
can serve as rcu_read_lock_sched(), but is less readable and
- prevents lockdep from detecting locking issues.
+ prevents lockdep from detecting locking issues. Acquiring a
+ raw spinlock also enters an RCU read-side critical section.
+
+ The guard(rcu)() and scoped_guard(rcu) primitives designate
+ the remainder of the current scope or the next statement,
+ respectively, as the RCU read-side critical section. Use of
+ these guards can be less error-prone than rcu_read_lock(),
+ rcu_read_unlock(), and friends.
Please note that you *cannot* rely on code known to be built
only in non-preemptible kernels. Such code can and will break,
@@ -193,14 +200,13 @@ over a rather long period of time, but improvements are always welcome!
when publicizing a pointer to a structure that can
be traversed by an RCU read-side critical section.
-5. If any of call_rcu(), call_srcu(), call_rcu_tasks(),
- call_rcu_tasks_rude(), or call_rcu_tasks_trace() is used,
- the callback function may be invoked from softirq context,
- and in any case with bottom halves disabled. In particular,
- this callback function cannot block. If you need the callback
- to block, run that code in a workqueue handler scheduled from
- the callback. The queue_rcu_work() function does this for you
- in the case of call_rcu().
+5. If any of call_rcu(), call_srcu(), call_rcu_tasks(), or
+ call_rcu_tasks_trace() is used, the callback function may be
+ invoked from softirq context, and in any case with bottom halves
+ disabled. In particular, this callback function cannot block.
+ If you need the callback to block, run that code in a workqueue
+ handler scheduled from the callback. The queue_rcu_work()
+ function does this for you in the case of call_rcu().
6. Since synchronize_rcu() can block, it cannot be called
from any sort of irq context. The same rule applies
@@ -253,10 +259,10 @@ over a rather long period of time, but improvements are always welcome!
corresponding readers must use rcu_read_lock_trace()
and rcu_read_unlock_trace().
- c. If an updater uses call_rcu_tasks_rude() or
- synchronize_rcu_tasks_rude(), then the corresponding
- readers must use anything that disables preemption,
- for example, preempt_disable() and preempt_enable().
+ c. If an updater uses synchronize_rcu_tasks_rude(),
+ then the corresponding readers must use anything that
+ disables preemption, for example, preempt_disable()
+ and preempt_enable().
Mixing things up will result in confusion and broken kernels, and
has even resulted in an exploitable security issue. Therefore,
@@ -325,11 +331,9 @@ over a rather long period of time, but improvements are always welcome!
d. Periodically invoke rcu_barrier(), permitting a limited
number of updates per grace period.
- The same cautions apply to call_srcu(), call_rcu_tasks(),
- call_rcu_tasks_rude(), and call_rcu_tasks_trace(). This is
- why there is an srcu_barrier(), rcu_barrier_tasks(),
- rcu_barrier_tasks_rude(), and rcu_barrier_tasks_rude(),
- respectively.
+ The same cautions apply to call_srcu(), call_rcu_tasks(), and
+ call_rcu_tasks_trace(). This is why there is an srcu_barrier(),
+ rcu_barrier_tasks(), and rcu_barrier_tasks_trace(), respectively.
Note that although these primitives do take action to avoid
memory exhaustion when any given CPU has too many callbacks,
@@ -383,15 +387,16 @@ over a rather long period of time, but improvements are always welcome!
to safely access and/or modify that data structure.
Do not assume that RCU callbacks will be executed on the same
- CPU that executed the corresponding call_rcu() or call_srcu().
- For example, if a given CPU goes offline while having an RCU
- callback pending, then that RCU callback will execute on some
- surviving CPU. (If this was not the case, a self-spawning RCU
- callback would prevent the victim CPU from ever going offline.)
- Furthermore, CPUs designated by rcu_nocbs= might well *always*
- have their RCU callbacks executed on some other CPUs, in fact,
- for some real-time workloads, this is the whole point of using
- the rcu_nocbs= kernel boot parameter.
+ CPU that executed the corresponding call_rcu(), call_srcu(),
+ call_rcu_tasks(), or call_rcu_tasks_trace(). For example, if
+ a given CPU goes offline while having an RCU callback pending,
+ then that RCU callback will execute on some surviving CPU.
+ (If this was not the case, a self-spawning RCU callback would
+ prevent the victim CPU from ever going offline.) Furthermore,
+ CPUs designated by rcu_nocbs= might well *always* have their
+ RCU callbacks executed on some other CPUs, in fact, for some
+ real-time workloads, this is the whole point of using the
+ rcu_nocbs= kernel boot parameter.
In addition, do not assume that callbacks queued in a given order
will be invoked in that order, even if they all are queued on the
@@ -406,15 +411,19 @@ over a rather long period of time, but improvements are always welcome!
13. Unlike most flavors of RCU, it *is* permissible to block in an
SRCU read-side critical section (demarked by srcu_read_lock()
and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
- Please note that if you don't need to sleep in read-side critical
- sections, you should be using RCU rather than SRCU, because RCU
- is almost always faster and easier to use than is SRCU.
-
- Also unlike other forms of RCU, explicit initialization and
- cleanup is required either at build time via DEFINE_SRCU()
- or DEFINE_STATIC_SRCU() or at runtime via init_srcu_struct()
- and cleanup_srcu_struct(). These last two are passed a
- "struct srcu_struct" that defines the scope of a given
+ As with RCU, guard(srcu)() and scoped_guard(srcu) forms are
+ available, and often provide greater ease of use. Please note
+ that if you don't need to sleep in read-side critical sections,
+ you should be using RCU rather than SRCU, because RCU is almost
+ always faster and easier to use than is SRCU.
+
+ Also unlike other forms of RCU, explicit initialization
+ and cleanup is required either at build time via
+ DEFINE_SRCU(), DEFINE_STATIC_SRCU(), DEFINE_SRCU_FAST(),
+ or DEFINE_STATIC_SRCU_FAST() or at runtime via either
+ init_srcu_struct() or init_srcu_struct_fast() and
+ cleanup_srcu_struct(). These last three are passed a
+ `struct srcu_struct` that defines the scope of a given
SRCU domain. Once initialized, the srcu_struct is passed
to srcu_read_lock(), srcu_read_unlock() synchronize_srcu(),
synchronize_srcu_expedited(), and call_srcu(). A given
@@ -444,10 +453,13 @@ over a rather long period of time, but improvements are always welcome!
real-time workloads than is synchronize_rcu_expedited().
It is also permissible to sleep in RCU Tasks Trace read-side
- critical, which are delimited by rcu_read_lock_trace() and
- rcu_read_unlock_trace(). However, this is a specialized flavor
- of RCU, and you should not use it without first checking with
- its current users. In most cases, you should instead use SRCU.
+ critical section, which are delimited by rcu_read_lock_trace()
+ and rcu_read_unlock_trace(). However, this is a specialized
+ flavor of RCU, and you should not use it without first checking
+ with its current users. In most cases, you should instead
+ use SRCU. As with RCU and SRCU, guard(rcu_tasks_trace)() and
+ scoped_guard(rcu_tasks_trace) are available, and often provide
+ greater ease of use.
Note that rcu_assign_pointer() relates to SRCU just as it does to
other forms of RCU, but instead of rcu_dereference() you should
@@ -490,6 +502,12 @@ over a rather long period of time, but improvements are always welcome!
since the last time that you passed that same object to
call_rcu() (or friends).
+ CONFIG_RCU_STRICT_GRACE_PERIOD:
+ combine with KASAN to check for pointers leaked out
+ of RCU read-side critical sections. This Kconfig
+ option is tough on both performance and scalability,
+ and so is limited to four-CPU systems.
+
__rcu sparse checks:
tag the pointer to the RCU-protected data structure
with __rcu, and sparse will warn you if you access that
@@ -499,9 +517,9 @@ over a rather long period of time, but improvements are always welcome!
These debugging aids can help you find problems that are
otherwise extremely difficult to spot.
-17. If you pass a callback function defined within a module to one of
- call_rcu(), call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(),
- or call_rcu_tasks_trace(), then it is necessary to wait for all
+17. If you pass a callback function defined within a module
+ to one of call_rcu(), call_srcu(), call_rcu_tasks(), or
+ call_rcu_tasks_trace(), then it is necessary to wait for all
pending callbacks to be invoked before unloading that module.
Note that it is absolutely *not* sufficient to wait for a grace
period! For example, synchronize_rcu() implementation is *not*
@@ -514,7 +532,6 @@ over a rather long period of time, but improvements are always welcome!
- call_rcu() -> rcu_barrier()
- call_srcu() -> srcu_barrier()
- call_rcu_tasks() -> rcu_barrier_tasks()
- - call_rcu_tasks_rude() -> rcu_barrier_tasks_rude()
- call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()
However, these barrier functions are absolutely *not* guaranteed
@@ -531,7 +548,6 @@ over a rather long period of time, but improvements are always welcome!
- Either synchronize_srcu() or synchronize_srcu_expedited(),
together with and srcu_barrier()
- synchronize_rcu_tasks() and rcu_barrier_tasks()
- - synchronize_tasks_rude() and rcu_barrier_tasks_rude()
- synchronize_tasks_trace() and rcu_barrier_tasks_trace()
If necessary, you can use something like workqueues to execute
diff --git a/Documentation/RCU/index.rst b/Documentation/RCU/index.rst
index 84a79903f6a8..ef26c78507d3 100644
--- a/Documentation/RCU/index.rst
+++ b/Documentation/RCU/index.rst
@@ -1,13 +1,13 @@
.. SPDX-License-Identifier: GPL-2.0
-.. _rcu_concepts:
+.. _rcu_handbook:
============
-RCU concepts
+RCU Handbook
============
.. toctree::
- :maxdepth: 3
+ :maxdepth: 2
checklist
lockdep
diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst
index ed5c9d8c9afe..d8bb98623c12 100644
--- a/Documentation/RCU/listRCU.rst
+++ b/Documentation/RCU/listRCU.rst
@@ -334,7 +334,7 @@ If the system-call audit module were to ever need to reject stale data, one way
to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
``audit_entry`` structure, and modify audit_filter_task() as follows::
- static enum audit_state audit_filter_task(struct task_struct *tsk)
+ static struct audit_entry *audit_filter_task(struct task_struct *tsk, char **key)
{
struct audit_entry *e;
enum audit_state state;
@@ -346,16 +346,18 @@ to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to
if (e->deleted) {
spin_unlock(&e->lock);
rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
+ return NULL;
}
rcu_read_unlock();
if (state == AUDIT_STATE_RECORD)
*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
- return state;
+ /* As long as e->lock is held, e is valid and
+ * its value is not stale */
+ return e;
}
}
rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
+ return NULL;
}
The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
diff --git a/Documentation/RCU/lockdep.rst b/Documentation/RCU/lockdep.rst
index 69e73a39bd11..741b157bbacb 100644
--- a/Documentation/RCU/lockdep.rst
+++ b/Documentation/RCU/lockdep.rst
@@ -106,7 +106,7 @@ or the RCU-protected data that it points to can change concurrently.
Like rcu_dereference(), when lockdep is enabled, RCU list and hlist
traversal primitives check for being called from within an RCU read-side
critical section. However, a lockdep expression can be passed to them
-as a additional optional argument. With this lockdep expression, these
+as an additional optional argument. With this lockdep expression, these
traversal primitives will complain only if the lockdep expression is
false and they are called from outside any RCU read-side critical section.
diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst
index 659d5913784d..2524dcdadde2 100644
--- a/Documentation/RCU/rcu_dereference.rst
+++ b/Documentation/RCU/rcu_dereference.rst
@@ -408,7 +408,10 @@ member of the rcu_dereference() to use in various situations:
RCU flavors, an RCU read-side critical section is entered
using rcu_read_lock(), anything that disables bottom halves,
anything that disables interrupts, or anything that disables
- preemption.
+ preemption. Please note that spinlock critical sections
+ are also implied RCU read-side critical sections, even when
+ they are preemptible, as they are in kernels built with
+ CONFIG_PREEMPT_RT=y.
2. If the access might be within an RCU read-side critical section
on the one hand, or protected by (say) my_lock on the other,
diff --git a/Documentation/RCU/rcubarrier.rst b/Documentation/RCU/rcubarrier.rst
index 6da7f66da2a8..12a7b059654f 100644
--- a/Documentation/RCU/rcubarrier.rst
+++ b/Documentation/RCU/rcubarrier.rst
@@ -329,10 +329,7 @@ Answer:
was first added back in 2005. This is because on_each_cpu()
disables preemption, which acted as an RCU read-side critical
section, thus preventing CPU 0's grace period from completing
- until on_each_cpu() had dealt with all of the CPUs. However,
- with the advent of preemptible RCU, rcu_barrier() no longer
- waited on nonpreemptible regions of code in preemptible kernels,
- that being the job of the new rcu_barrier_sched() function.
+ until on_each_cpu() had dealt with all of the CPUs.
However, with the RCU flavor consolidation around v4.20, this
possibility was once again ruled out, because the consolidated
diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
index ca7b7cd806a1..d7c8eff63317 100644
--- a/Documentation/RCU/stallwarn.rst
+++ b/Documentation/RCU/stallwarn.rst
@@ -96,6 +96,13 @@ warnings:
the ``rcu_.*timer wakeup didn't happen for`` console-log message,
which will include additional debugging information.
+- A timer issue causes time to appear to jump forward, so that RCU
+ believes that the RCU CPU stall-warning timeout has been exceeded
+ when in fact much less time has passed. This could be due to
+ timer hardware bugs, timer driver bugs, or even corruption of
+ the "jiffies" global variable. These sorts of timer hardware
+ and driver bugs are not uncommon when testing new hardware.
+
- A low-level kernel issue that either fails to invoke one of the
variants of rcu_eqs_enter(true), rcu_eqs_exit(true), ct_idle_enter(),
ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one
@@ -112,7 +119,7 @@ warnings:
uncommon in large datacenter. In one memorable case some decades
back, a CPU failed in a running system, becoming unresponsive,
but not causing an immediate crash. This resulted in a series
- of RCU CPU stall warnings, eventually leading the realization
+ of RCU CPU stall warnings, eventually leading to the realization
that the CPU had failed.
The RCU, RCU-sched, RCU-tasks, and RCU-tasks-trace implementations have
@@ -249,7 +256,7 @@ ticks this GP)" indicates that this CPU has not taken any scheduling-clock
interrupts during the current stalled grace period.
The "idle=" portion of the message prints the dyntick-idle state.
-The hex number before the first "/" is the low-order 12 bits of the
+The hex number before the first "/" is the low-order 16 bits of the
dynticks counter, which will have an even-numbered value if the CPU
is in dyntick-idle mode and an odd-numbered value otherwise. The hex
number between the two "/"s is the value of the nesting, which will be
diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst
index 49e7beea6ae1..1ad5cc793811 100644
--- a/Documentation/RCU/torture.rst
+++ b/Documentation/RCU/torture.rst
@@ -318,7 +318,7 @@ Suppose that a previous kvm.sh run left its output in this directory::
tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
-Then this run can be re-run without rebuilding as follow:
+Then this run can be re-run without rebuilding as follow::
kvm-again.sh tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28
@@ -344,7 +344,7 @@ painstaking and error-prone.
And this is why the kvm-remote.sh script exists.
-If you the following command works::
+If the following command works::
ssh system0 date
@@ -364,7 +364,7 @@ systems must come first.
The kvm.sh ``--dryrun scenarios`` argument is useful for working out
how many scenarios may be run in one batch across a group of systems.
-You can also re-run a previous remote run in a manner similar to kvm.sh:
+You can also re-run a previous remote run in a manner similar to kvm.sh::
kvm-remote.sh "system0 system1 system2 system3 system4 system5" \
tools/testing/selftests/rcutorture/res/2022.11.03-11.26.28-remote \
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index 60ce02475142..a1582bd653d1 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -15,6 +15,9 @@ to start learning about RCU:
| 2014 Big API Table https://lwn.net/Articles/609973/
| 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/
| 2019 Big API Table https://lwn.net/Articles/777165/
+| 7. The RCU API, 2024 Edition https://lwn.net/Articles/988638/
+| 2024 Background Information https://lwn.net/Articles/988641/
+| 2024 Big API Table https://lwn.net/Articles/988666/
For those preferring video:
@@ -172,14 +175,25 @@ rcu_read_lock()
critical section. Reference counts may be used in conjunction
with RCU to maintain longer-term references to data structures.
+ Note that anything that disables bottom halves, preemption,
+ or interrupts also enters an RCU read-side critical section.
+ Acquiring a spinlock also enters an RCU read-side critical
+ sections, even for spinlocks that do not disable preemption,
+ as is the case in kernels built with CONFIG_PREEMPT_RT=y.
+ Sleeplocks do *not* enter RCU read-side critical sections.
+
rcu_read_unlock()
^^^^^^^^^^^^^^^^^
void rcu_read_unlock(void);
This temporal primitives is used by a reader to inform the
reclaimer that the reader is exiting an RCU read-side critical
- section. Note that RCU read-side critical sections may be nested
- and/or overlapping.
+ section. Anything that enables bottom halves, preemption,
+ or interrupts also exits an RCU read-side critical section.
+ Releasing a spinlock also exits an RCU read-side critical section.
+
+ Note that RCU read-side critical sections may be nested and/or
+ overlapping.
synchronize_rcu()
^^^^^^^^^^^^^^^^^
@@ -239,21 +253,25 @@ rcu_assign_pointer()
^^^^^^^^^^^^^^^^^^^^
void rcu_assign_pointer(p, typeof(p) v);
- Yes, rcu_assign_pointer() **is** implemented as a macro, though it
- would be cool to be able to declare a function in this manner.
- (Compiler experts will no doubt disagree.)
+ Yes, rcu_assign_pointer() **is** implemented as a macro, though
+ it would be cool to be able to declare a function in this manner.
+ (And there has been some discussion of adding overloaded functions
+ to the C language, so who knows?)
The updater uses this spatial macro to assign a new value to an
RCU-protected pointer, in order to safely communicate the change
in value from the updater to the reader. This is a spatial (as
opposed to temporal) macro. It does not evaluate to an rvalue,
- but it does execute any memory-barrier instructions required
- for a given CPU architecture. Its ordering properties are that
- of a store-release operation.
-
- Perhaps just as important, it serves to document (1) which
- pointers are protected by RCU and (2) the point at which a
- given structure becomes accessible to other CPUs. That said,
+ but it does provide any compiler directives and memory-barrier
+ instructions required for a given compile or CPU architecture.
+ Its ordering properties are that of a store-release operation,
+ that is, any prior loads and stores required to initialize the
+ structure are ordered before the store that publishes the pointer
+ to that structure.
+
+ Perhaps just as important, rcu_assign_pointer() serves to document
+ (1) which pointers are protected by RCU and (2) the point at which
+ a given structure becomes accessible to other CPUs. That said,
rcu_assign_pointer() is most frequently used indirectly, via
the _rcu list-manipulation primitives such as list_add_rcu().
@@ -272,7 +290,11 @@ rcu_dereference()
executes any needed memory-barrier instructions for a given
CPU architecture. Currently, only Alpha needs memory barriers
within rcu_dereference() -- on other CPUs, it compiles to a
- volatile load.
+ volatile load. However, no mainstream C compilers respect
+ address dependencies, so rcu_dereference() uses volatile casts,
+ which, in combination with the coding guidelines listed in
+ rcu_dereference.rst, prevent current compilers from breaking
+ these dependencies.
Common coding practice uses rcu_dereference() to copy an
RCU-protected pointer to a local variable, then dereferences
@@ -416,7 +438,7 @@ their assorted primitives.
This section shows a simple use of the core RCU API to protect a
global pointer to a dynamically allocated structure. More-typical
-uses of RCU may be found in listRCU.rst, arrayRCU.rst, and NMI-RCU.rst.
+uses of RCU may be found in listRCU.rst and NMI-RCU.rst.
::
struct foo {
@@ -499,8 +521,8 @@ So, to sum up:
data item.
See checklist.rst for additional rules to follow when using RCU.
-And again, more-typical uses of RCU may be found in listRCU.rst,
-arrayRCU.rst, and NMI-RCU.rst.
+And again, more-typical uses of RCU may be found in listRCU.rst
+and NMI-RCU.rst.
.. _4_whatisRCU:
@@ -952,8 +974,18 @@ unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be
initialized after each and every call to kmem_cache_alloc(), which renders
reference-free spinlock acquisition completely unsafe. Therefore, when
using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter.
-(Those willing to use a kmem_cache constructor may also use locking,
-including cache-friendly sequence locking.)
+If using refcount_t, the specialized refcount_{add|inc}_not_zero_acquire()
+and refcount_set_release() APIs should be used to ensure correct operation
+ordering when verifying object identity and when initializing newly
+allocated objects. Acquire fence in refcount_{add|inc}_not_zero_acquire()
+ensures that identity checks happen *after* reference count is taken.
+refcount_set_release() should be called after a newly allocated object is
+fully initialized and release fence ensures that new values are visible
+*before* refcount can be successfully taken by other users. Once
+refcount_set_release() is called, the object should be considered visible
+by other tasks.
+(Those willing to initialize their locks in a kmem_cache constructor
+may also use locking, including cache-friendly sequence locking.)
With traditional reference counting -- such as that implemented by the
kref library in Linux -- there is typically code that runs when the last
@@ -989,32 +1021,41 @@ RCU list traversal::
list_entry_rcu
list_entry_lockless
list_first_entry_rcu
+ list_first_or_null_rcu
+ list_tail_rcu
list_next_rcu
+ list_next_or_null_rcu
list_for_each_entry_rcu
list_for_each_entry_continue_rcu
list_for_each_entry_from_rcu
- list_first_or_null_rcu
- list_next_or_null_rcu
+ list_for_each_entry_lockless
hlist_first_rcu
hlist_next_rcu
hlist_pprev_rcu
hlist_for_each_entry_rcu
+ hlist_for_each_entry_rcu_notrace
hlist_for_each_entry_rcu_bh
hlist_for_each_entry_from_rcu
hlist_for_each_entry_continue_rcu
hlist_for_each_entry_continue_rcu_bh
hlist_nulls_first_rcu
+ hlist_nulls_next_rcu
hlist_nulls_for_each_entry_rcu
+ hlist_nulls_for_each_entry_safe
hlist_bl_first_rcu
hlist_bl_for_each_entry_rcu
RCU pointer/list update::
rcu_assign_pointer
+ rcu_replace_pointer
+ INIT_LIST_HEAD_RCU
list_add_rcu
list_add_tail_rcu
list_del_rcu
list_replace_rcu
+ list_splice_init_rcu
+ list_splice_tail_init_rcu
hlist_add_behind_rcu
hlist_add_before_rcu
hlist_add_head_rcu
@@ -1022,34 +1063,53 @@ RCU pointer/list update::
hlist_del_rcu
hlist_del_init_rcu
hlist_replace_rcu
- list_splice_init_rcu
- list_splice_tail_init_rcu
hlist_nulls_del_init_rcu
hlist_nulls_del_rcu
hlist_nulls_add_head_rcu
+ hlist_nulls_add_tail_rcu
+ hlist_nulls_add_fake
+ hlists_swap_heads_rcu
hlist_bl_add_head_rcu
- hlist_bl_del_init_rcu
hlist_bl_del_rcu
hlist_bl_set_first_rcu
RCU::
- Critical sections Grace period Barrier
-
- rcu_read_lock synchronize_net rcu_barrier
- rcu_read_unlock synchronize_rcu
- rcu_dereference synchronize_rcu_expedited
- rcu_read_lock_held call_rcu
- rcu_dereference_check kfree_rcu
- rcu_dereference_protected
+ Critical sections Grace period Barrier
+
+ rcu_read_lock synchronize_net rcu_barrier
+ rcu_read_unlock synchronize_rcu
+ guard(rcu)() synchronize_rcu_expedited
+ scoped_guard(rcu) synchronize_rcu_mult
+ rcu_dereference call_rcu
+ rcu_dereference_check call_rcu_hurry
+ rcu_dereference_protected kfree_rcu
+ rcu_read_lock_held kvfree_rcu
+ rcu_read_lock_any_held kfree_rcu_mightsleep
+ rcu_pointer_handoff cond_synchronize_rcu
+ unrcu_pointer cond_synchronize_rcu_full
+ cond_synchronize_rcu_expedited
+ cond_synchronize_rcu_expedited_full
+ get_completed_synchronize_rcu
+ get_completed_synchronize_rcu_full
+ get_state_synchronize_rcu
+ get_state_synchronize_rcu_full
+ poll_state_synchronize_rcu
+ poll_state_synchronize_rcu_full
+ same_state_synchronize_rcu
+ same_state_synchronize_rcu_full
+ start_poll_synchronize_rcu
+ start_poll_synchronize_rcu_full
+ start_poll_synchronize_rcu_expedited
+ start_poll_synchronize_rcu_expedited_full
bh::
Critical sections Grace period Barrier
- rcu_read_lock_bh call_rcu rcu_barrier
- rcu_read_unlock_bh synchronize_rcu
- [local_bh_disable] synchronize_rcu_expedited
+ rcu_read_lock_bh [Same as RCU] [Same as RCU]
+ rcu_read_unlock_bh
+ [local_bh_disable]
[and friends]
rcu_dereference_bh
rcu_dereference_bh_check
@@ -1060,9 +1120,9 @@ sched::
Critical sections Grace period Barrier
- rcu_read_lock_sched call_rcu rcu_barrier
- rcu_read_unlock_sched synchronize_rcu
- [preempt_disable] synchronize_rcu_expedited
+ rcu_read_lock_sched [Same as RCU] [Same as RCU]
+ rcu_read_unlock_sched
+ [preempt_disable]
[and friends]
rcu_read_lock_sched_notrace
rcu_read_unlock_sched_notrace
@@ -1072,46 +1132,107 @@ sched::
rcu_read_lock_sched_held
+RCU: Initialization/cleanup/ordering::
+
+ RCU_INIT_POINTER
+ RCU_INITIALIZER
+ RCU_POINTER_INITIALIZER
+ init_rcu_head
+ destroy_rcu_head
+ init_rcu_head_on_stack
+ destroy_rcu_head_on_stack
+ SLAB_TYPESAFE_BY_RCU
+
+
+RCU: Quiescents states and control::
+
+ cond_resched_tasks_rcu_qs
+ rcu_all_qs
+ rcu_softirq_qs_periodic
+ rcu_end_inkernel_boot
+ rcu_expedite_gp
+ rcu_gp_is_expedited
+ rcu_unexpedite_gp
+ rcu_cpu_stall_reset
+ rcu_head_after_call_rcu
+ rcu_is_watching
+
+
+RCU-sync primitive::
+
+ rcu_sync_is_idle
+ rcu_sync_init
+ rcu_sync_enter
+ rcu_sync_exit
+ rcu_sync_dtor
+
+
RCU-Tasks::
- Critical sections Grace period Barrier
+ Critical sections Grace period Barrier
- N/A call_rcu_tasks rcu_barrier_tasks
+ N/A call_rcu_tasks rcu_barrier_tasks
synchronize_rcu_tasks
RCU-Tasks-Rude::
- Critical sections Grace period Barrier
+ Critical sections Grace period Barrier
- N/A call_rcu_tasks_rude rcu_barrier_tasks_rude
- synchronize_rcu_tasks_rude
+ N/A synchronize_rcu_tasks_rude rcu_barrier_tasks_rude
+ call_rcu_tasks_rude
RCU-Tasks-Trace::
- Critical sections Grace period Barrier
+ Critical sections Grace period Barrier
- rcu_read_lock_trace call_rcu_tasks_trace rcu_barrier_tasks_trace
+ rcu_read_lock_trace call_rcu_tasks_trace rcu_barrier_tasks_trace
rcu_read_unlock_trace synchronize_rcu_tasks_trace
+ guard(rcu_tasks_trace)()
+ scoped_guard(rcu_tasks_trace)
-SRCU::
+SRCU list traversal::
+ list_for_each_entry_srcu
+ hlist_for_each_entry_srcu
- Critical sections Grace period Barrier
- srcu_read_lock call_srcu srcu_barrier
- srcu_read_unlock synchronize_srcu
- srcu_dereference synchronize_srcu_expedited
+SRCU::
+
+ Critical sections Grace period Barrier
+
+ srcu_read_lock call_srcu srcu_barrier
+ srcu_read_unlock synchronize_srcu
+ srcu_read_lock_fast synchronize_srcu_expedited
+ srcu_read_unlock_fast get_state_synchronize_srcu
+ srcu_read_lock_nmisafe start_poll_synchronize_srcu
+ srcu_read_unlock_nmisafe start_poll_synchronize_srcu_expedited
+ srcu_read_lock_notrace poll_state_synchronize_srcu
+ srcu_read_unlock_notrace
+ srcu_down_read
+ srcu_up_read
+ srcu_down_read_fast
+ srcu_up_read_fast
+ guard(srcu)()
+ scoped_guard(srcu)
+ srcu_read_lock_held
+ srcu_dereference
srcu_dereference_check
+ srcu_dereference_notrace
srcu_read_lock_held
-SRCU: Initialization/cleanup::
+
+SRCU: Initialization/cleanup/ordering::
DEFINE_SRCU
DEFINE_STATIC_SRCU
+ DEFINE_SRCU_FAST // for srcu_read_lock_fast() and friends
+ DEFINE_STATIC_SRCU_FAST // for srcu_read_lock_fast() and friends
init_srcu_struct
+ init_srcu_struct_fast
cleanup_srcu_struct
+ smp_mb__after_srcu_read_unlock
All: lockdep-checked RCU utility APIs::
diff --git a/Documentation/accel/amdxdna/amdnpu.rst b/Documentation/accel/amdxdna/amdnpu.rst
new file mode 100644
index 000000000000..42e54904f9a8
--- /dev/null
+++ b/Documentation/accel/amdxdna/amdnpu.rst
@@ -0,0 +1,281 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+.. include:: <isonum.txt>
+
+=========
+ AMD NPU
+=========
+
+:Copyright: |copy| 2024 Advanced Micro Devices, Inc.
+:Author: Sonal Santan <sonal.santan@amd.com>
+
+Overview
+========
+
+AMD NPU (Neural Processing Unit) is a multi-user AI inference accelerator
+integrated into AMD client APU. NPU enables efficient execution of Machine
+Learning applications like CNN, LLM, etc. NPU is based on
+`AMD XDNA Architecture`_. NPU is managed by **amdxdna** driver.
+
+
+Hardware Description
+====================
+
+AMD NPU consists of the following hardware components:
+
+AMD XDNA Array
+--------------
+
+AMD XDNA Array comprises of 2D array of compute and memory tiles built with
+`AMD AI Engine Technology`_. Each column has 4 rows of compute tiles and 1
+row of memory tile. Each compute tile contains a VLIW processor with its own
+dedicated program and data memory. The memory tile acts as L2 memory. The 2D
+array can be partitioned at a column boundary creating a spatially isolated
+partition which can be bound to a workload context.
+
+Each column also has dedicated DMA engines to move data between host DDR and
+memory tile.
+
+AMD Phoenix and AMD Hawk Point client NPU have a 4x5 topology, i.e., 4 rows of
+compute tiles arranged into 5 columns. AMD Strix Point client APU have 4x8
+topology, i.e., 4 rows of compute tiles arranged into 8 columns.
+
+Shared L2 Memory
+----------------
+
+The single row of memory tiles create a pool of software managed on chip L2
+memory. DMA engines are used to move data between host DDR and memory tiles.
+AMD Phoenix and AMD Hawk Point NPUs have a total of 2560 KB of L2 memory.
+AMD Strix Point NPU has a total of 4096 KB of L2 memory.
+
+Microcontroller
+---------------
+
+A microcontroller runs NPU Firmware which is responsible for command processing,
+XDNA Array partition setup, XDNA Array configuration, workload context
+management and workload orchestration.
+
+NPU Firmware uses a dedicated instance of an isolated non-privileged context
+called ERT to service each workload context. ERT is also used to execute user
+provided ``ctrlcode`` associated with the workload context.
+
+NPU Firmware uses a single isolated privileged context called MERT to service
+management commands from the amdxdna driver.
+
+Mailboxes
+---------
+
+The microcontroller and amdxdna driver use a privileged channel for management
+tasks like setting up of contexts, telemetry, query, error handling, setting up
+user channel, etc. As mentioned before, privileged channel requests are
+serviced by MERT. The privileged channel is bound to a single mailbox.
+
+The microcontroller and amdxdna driver use a dedicated user channel per
+workload context. The user channel is primarily used for submitting work to
+the NPU. As mentioned before, a user channel requests are serviced by an
+instance of ERT. Each user channel is bound to its own dedicated mailbox.
+
+PCIe EP
+-------
+
+NPU is visible to the x86 host CPU as a PCIe device with multiple BARs and some
+MSI-X interrupt vectors. NPU uses a dedicated high bandwidth SoC level fabric
+for reading or writing into host memory. Each instance of ERT gets its own
+dedicated MSI-X interrupt. MERT gets a single instance of MSI-X interrupt.
+
+The number of PCIe BARs varies depending on the specific device. Based on their
+functions, PCIe BARs can generally be categorized into the following types.
+
+* PSP BAR: Expose the AMD PSP (Platform Security Processor) function
+* SMU BAR: Expose the AMD SMU (System Management Unit) function
+* SRAM BAR: Expose ring buffers for the mailbox
+* Mailbox BAR: Expose the mailbox control registers (head, tail and ISR
+ registers etc.)
+* Public Register BAR: Expose public registers
+
+On specific devices, the above-mentioned BAR type might be combined into a
+single physical PCIe BAR. Or a module might require two physical PCIe BARs to
+be fully functional. For example,
+
+* On AMD Phoenix device, PSP, SMU, Public Register BARs are on PCIe BAR index 0.
+* On AMD Strix Point device, Mailbox and Public Register BARs are on PCIe BAR
+ index 0. The PSP has some registers in PCIe BAR index 0 (Public Register BAR)
+ and PCIe BAR index 4 (PSP BAR).
+
+Process Isolation Hardware
+--------------------------
+
+As explained before, XDNA Array can be dynamically divided into isolated
+spatial partitions, each of which may have one or more columns. The spatial
+partition is setup by programming the column isolation registers by the
+microcontroller. Each spatial partition is associated with a PASID which is
+also programmed by the microcontroller. Hence multiple spatial partitions in
+the NPU can make concurrent host access protected by PASID.
+
+The NPU FW itself uses microcontroller MMU enforced isolated contexts for
+servicing user and privileged channel requests.
+
+
+Mixed Spatial and Temporal Scheduling
+=====================================
+
+AMD XDNA architecture supports mixed spatial and temporal (time sharing)
+scheduling of 2D array. This means that spatial partitions may be setup and
+torn down dynamically to accommodate various workloads. A *spatial* partition
+may be *exclusively* bound to one workload context while another partition may
+be *temporarily* bound to more than one workload contexts. The microcontroller
+updates the PASID for a temporarily shared partition to match the context that
+has been bound to the partition at any moment.
+
+Resource Solver
+---------------
+
+The Resource Solver component of the amdxdna driver manages the allocation
+of 2D array among various workloads. Every workload describes the number
+of columns required to run the NPU binary in its metadata. The Resource Solver
+component uses hints passed by the workload and its own heuristics to
+decide 2D array (re)partition strategy and mapping of workloads for spatial and
+temporal sharing of columns. The FW enforces the context-to-column(s) resource
+binding decisions made by the Resource Solver.
+
+AMD Phoenix and AMD Hawk Point client NPU can support 6 concurrent workload
+contexts. AMD Strix Point can support 16 concurrent workload contexts.
+
+
+Application Binaries
+====================
+
+A NPU application workload is comprised of two separate binaries which are
+generated by the NPU compiler.
+
+1. AMD XDNA Array overlay, which is used to configure a NPU spatial partition.
+ The overlay contains instructions for setting up the stream switch
+ configuration and ELF for the compute tiles. The overlay is loaded on the
+ spatial partition bound to the workload by the associated ERT instance.
+ Refer to the
+ `Versal Adaptive SoC AIE-ML Architecture Manual (AM020)`_ for more details.
+
+2. ``ctrlcode``, used for orchestrating the overlay loaded on the spatial
+ partition. ``ctrlcode`` is executed by the ERT running in protected mode on
+ the microcontroller in the context of the workload. ``ctrlcode`` is made up
+ of a sequence of opcodes named ``XAie_TxnOpcode``. Refer to the
+ `AI Engine Run Time`_ for more details.
+
+
+Special Host Buffers
+====================
+
+Per-context Instruction Buffer
+------------------------------
+
+Every workload context uses a host resident 64 MB buffer which is memory
+mapped into the ERT instance created to service the workload. The ``ctrlcode``
+used by the workload is copied into this special memory. This buffer is
+protected by PASID like all other input/output buffers used by that workload.
+Instruction buffer is also mapped into the user space of the workload.
+
+Global Privileged Buffer
+------------------------
+
+In addition, the driver also allocates a single buffer for maintenance tasks
+like recording errors from MERT. This global buffer uses the global IOMMU
+domain and is only accessible by MERT.
+
+
+High-level Use Flow
+===================
+
+Here are the steps to run a workload on AMD NPU:
+
+1. Compile the workload into an overlay and a ``ctrlcode`` binary.
+2. Userspace opens a context in the driver and provides the overlay.
+3. The driver checks with the Resource Solver for provisioning a set of columns
+ for the workload.
+4. The driver then asks MERT to create a context on the device with the desired
+ columns.
+5. MERT then creates an instance of ERT. MERT also maps the Instruction Buffer
+ into ERT memory.
+6. The userspace then copies the ``ctrlcode`` to the Instruction Buffer.
+7. Userspace then creates a command buffer with pointers to input, output, and
+ instruction buffer; it then submits command buffer with the driver and goes
+ to sleep waiting for completion.
+8. The driver sends the command over the Mailbox to ERT.
+9. ERT *executes* the ``ctrlcode`` in the instruction buffer.
+10. Execution of the ``ctrlcode`` kicks off DMAs to and from the host DDR while
+ AMD XDNA Array is running.
+11. When ERT reaches end of ``ctrlcode``, it raises an MSI-X to send completion
+ signal to the driver which then wakes up the waiting workload.
+
+
+Boot Flow
+=========
+
+amdxdna driver uses PSP to securely load signed NPU FW and kick off the boot
+of the NPU microcontroller. amdxdna driver then waits for the alive signal in
+a special location on BAR 0. The NPU is switched off during SoC suspend and
+turned on after resume where the NPU FW is reloaded, and the handshake is
+performed again.
+
+
+Userspace components
+====================
+
+Compiler
+--------
+
+Peano is an LLVM based open-source single core compiler for AMD XDNA Array
+compute tile. Peano is available at:
+https://github.com/Xilinx/llvm-aie
+
+IRON is an open-source array compiler for AMD XDNA Array based NPU which uses
+Peano underneath. IRON is available at:
+https://github.com/Xilinx/mlir-aie
+
+Usermode Driver (UMD)
+---------------------
+
+The open-source XRT runtime stack interfaces with amdxdna kernel driver. XRT
+can be found at:
+https://github.com/Xilinx/XRT
+
+The open-source XRT shim for NPU is can be found at:
+https://github.com/amd/xdna-driver
+
+
+DMA Operation
+=============
+
+DMA operation instructions are encoded in the ``ctrlcode`` as
+``XAIE_IO_BLOCKWRITE`` opcode. When ERT executes ``XAIE_IO_BLOCKWRITE``, DMA
+operations between host DDR and L2 memory are effected.
+
+
+Error Handling
+==============
+
+When MERT detects an error in AMD XDNA Array, it pauses execution for that
+workload context and sends an asynchronous message to the driver over the
+privileged channel. The driver then sends a buffer pointer to MERT to capture
+the register states for the partition bound to faulting workload context. The
+driver then decodes the error by reading the contents of the buffer pointer.
+
+
+Telemetry
+=========
+
+MERT can report various kinds of telemetry information like the following:
+
+* L1 interrupt counter
+* DMA counter
+* Deep Sleep counter
+* etc.
+
+
+References
+==========
+
+- `AMD XDNA Architecture <https://www.amd.com/en/technologies/xdna.html>`_
+- `AMD AI Engine Technology <https://www.xilinx.com/products/technology/ai-engine.html>`_
+- `Peano <https://github.com/Xilinx/llvm-aie>`_
+- `Versal Adaptive SoC AIE-ML Architecture Manual (AM020) <https://docs.amd.com/r/en-US/am020-versal-aie-ml>`_
+- `AI Engine Run Time <https://github.com/Xilinx/aie-rt/tree/release/main_aig>`_
diff --git a/Documentation/accel/amdxdna/index.rst b/Documentation/accel/amdxdna/index.rst
new file mode 100644
index 000000000000..38c16939f1fc
--- /dev/null
+++ b/Documentation/accel/amdxdna/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=====================================
+ accel/amdxdna NPU driver
+=====================================
+
+The accel/amdxdna driver supports the AMD NPU (Neural Processing Unit).
+
+.. toctree::
+
+ amdnpu
diff --git a/Documentation/accel/index.rst b/Documentation/accel/index.rst
index e94a0160b6a0..d8fa332d60a8 100644
--- a/Documentation/accel/index.rst
+++ b/Documentation/accel/index.rst
@@ -8,7 +8,9 @@ Compute Accelerators
:maxdepth: 1
introduction
+ amdxdna/index
qaic/index
+ rocket/index
.. only:: subproject and html
diff --git a/Documentation/accel/qaic/aic080.rst b/Documentation/accel/qaic/aic080.rst
new file mode 100644
index 000000000000..d563771ea6ce
--- /dev/null
+++ b/Documentation/accel/qaic/aic080.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+===============================
+ Qualcomm Cloud AI 80 (AIC080)
+===============================
+
+Overview
+========
+
+The Qualcomm Cloud AI 80/AIC080 family of products are a derivative of AIC100.
+The number of NSPs and clock rates are reduced to fit within resource
+constrained solutions. The PCIe Product ID is 0xa080.
+
+As a derivative product, all AIC100 documentation applies.
diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst
index 590dae77ea12..41331cf580b1 100644
--- a/Documentation/accel/qaic/aic100.rst
+++ b/Documentation/accel/qaic/aic100.rst
@@ -229,6 +229,8 @@ of the defined channels, and their uses.
| _PERIODIC | | | timestamps in the device side logs with|
| | | | the host time source. |
+----------------+---------+----------+----------------------------------------+
+| IPCR | 24 & 25 | AMSS | AF_QIPCRTR clients and servers. |
++----------------+---------+----------+----------------------------------------+
DMA Bridge
==========
@@ -485,8 +487,8 @@ one user crashes, the fallout of that should be limited to that workload and not
impact other workloads. SSR accomplishes this.
If a particular workload crashes, QSM notifies the host via the QAIC_SSR MHI
-channel. This notification identifies the workload by it's assigned DBC. A
-multi-stage recovery process is then used to cleanup both sides, and get the
+channel. This notification identifies the workload by its assigned DBC. A
+multi-stage recovery process is then used to cleanup both sides, and gets the
DBC/NSPs into a working state.
When SSR occurs, any state in the workload is lost. Any inputs that were in
@@ -494,6 +496,27 @@ process, or queued by not yet serviced, are lost. The loaded artifacts will
remain in on-card DDR, but the host will need to re-activate the workload if
it desires to recover the workload.
+When SSR occurs for a specific NSP, the assigned DBC goes through the
+following state transactions in order:
+
+DBC_STATE_BEFORE_SHUTDOWN
+ Indicates that the affected NSP was found in an unrecoverable error
+ condition.
+DBC_STATE_AFTER_SHUTDOWN
+ Indicates that the NSP is under reset.
+DBC_STATE_BEFORE_POWER_UP
+ Indicates that the NSP's debug information has been collected, and is
+ ready to be collected by the host (if desired). At that stage the NSP
+ is restarted by QSM.
+DBC_STATE_AFTER_POWER_UP
+ Indicates that the NSP has been restarted, fully operational and is
+ in idle state.
+
+SSR also has an optional crashdump collection feature. If enabled, the host can
+collect the memory dump for the crashed NSP and dump it to the user space via
+the dev_coredump subsystem. The host can also decline the crashdump collection
+request from the device.
+
Reliability, Accessibility, Serviceability (RAS)
================================================
diff --git a/Documentation/accel/qaic/index.rst b/Documentation/accel/qaic/index.rst
index ad19b88d1a66..967b9dd8bace 100644
--- a/Documentation/accel/qaic/index.rst
+++ b/Documentation/accel/qaic/index.rst
@@ -10,4 +10,5 @@ accelerator cards.
.. toctree::
qaic
+ aic080
aic100
diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
index efb7771273bb..ef27e262cb91 100644
--- a/Documentation/accel/qaic/qaic.rst
+++ b/Documentation/accel/qaic/qaic.rst
@@ -36,7 +36,7 @@ polling mode and reenables the IRQ line.
This mitigation in QAIC is very effective. The same lprnet usecase that
generates 100k IRQs per second (per /proc/interrupts) is reduced to roughly 64
IRQs over 5 minutes while keeping the host system stable, and having the same
-workload throughput performance (within run to run noise variation).
+workload throughput performance (within run-to-run noise variation).
Single MSI Mode
---------------
@@ -49,7 +49,7 @@ useful to be able to fall back to a single MSI when needed.
To support this fallback, we allow the case where only one MSI is able to be
allocated, and share that one MSI between MHI and the DBCs. The device detects
when only one MSI has been configured and directs the interrupts for the DBCs
-to the interrupt normally used for MHI. Unfortunately this means that the
+to the interrupt normally used for MHI. Unfortunately, this means that the
interrupt handlers for every DBC and MHI wake up for every interrupt that
arrives; however, the DBC threaded irq handlers only are started when work to be
done is detected (MHI will always start its threaded handler).
@@ -62,9 +62,9 @@ never disabled, allowing each new entry to the FIFO to trigger a new interrupt.
Neural Network Control (NNC) Protocol
=====================================
-The implementation of NNC is split between the KMD (QAIC) and UMD. In general
+The implementation of NNC is split between the KMD (QAIC) and UMD. In general,
QAIC understands how to encode/decode NNC wire protocol, and elements of the
-protocol which require kernel space knowledge to process (for example, mapping
+protocol which requires kernel space knowledge to process (for example, mapping
host memory to device IOVAs). QAIC understands the structure of a message, and
all of the transactions. QAIC does not understand commands (the payload of a
passthrough transaction).
@@ -93,7 +93,7 @@ commands (does not impact QAIC).
uAPI
====
-QAIC creates an accel device per phsyical PCIe device. This accel device exists
+QAIC creates an accel device per physical PCIe device. This accel device exists
for as long as the PCIe device is known to Linux.
The PCIe device may not be in the state to accept requests from userspace at
@@ -147,12 +147,6 @@ DRM_IOCTL_QAIC_PERF_STATS_BO
recent execution of a BO. This allows userspace to construct an end to end
timeline of the BO processing for a performance analysis.
-DRM_IOCTL_QAIC_PART_DEV
- This IOCTL allows userspace to request a duplicate "shadow device". This extra
- accelN device is associated with a specific partition of resources on the
- AIC100 device and can be used for limiting a process to some subset of
- resources.
-
DRM_IOCTL_QAIC_DETACH_SLICE_BO
This IOCTL allows userspace to remove the slicing information from a BO that
was originally provided by a call to DRM_IOCTL_QAIC_ATTACH_SLICE_BO. This
diff --git a/Documentation/accel/rocket/index.rst b/Documentation/accel/rocket/index.rst
new file mode 100644
index 000000000000..70f97bccf100
--- /dev/null
+++ b/Documentation/accel/rocket/index.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=====================================
+ accel/rocket Rockchip NPU driver
+=====================================
+
+The accel/rocket driver supports the Neural Processing Units (NPUs) inside some
+Rockchip SoCs such as the RK3588. Rockchip calls it RKNN and sometimes RKNPU.
+
+The hardware is described in chapter 36 in the RK3588 TRM.
+
+This driver just powers the hardware on and off, allocates and maps buffers to
+the device and submits jobs to the frontend unit. Everything else is done in
+userspace, as a Gallium driver (also called rocket) that is part of the Mesa3D
+project.
+
+Hardware currently supported:
+
+* RK3588
diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index f61c01fc376e..86d7902a657f 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -100,29 +100,29 @@ Get delays, since system boot, for pid 10::
# ./getdelays -d -p 10
(output similar to next case)
-Get sum of delays, since system boot, for all pids with tgid 5::
+Get sum and peak of delays, since system boot, for all pids with tgid 242::
- # ./getdelays -d -t 5
+ bash-4.4# ./getdelays -d -t 242
print delayacct stats ON
- TGID 5
-
-
- CPU count real total virtual total delay total delay average
- 8 7000000 6872122 3382277 0.423ms
- IO count delay total delay average
- 0 0 0.000ms
- SWAP count delay total delay average
- 0 0 0.000ms
- RECLAIM count delay total delay average
- 0 0 0.000ms
- THRASHING count delay total delay average
- 0 0 0.000ms
- COMPACT count delay total delay average
- 0 0 0.000ms
- WPCOPY count delay total delay average
- 0 0 0.000ms
- IRQ count delay total delay average
- 0 0 0.000ms
+ TGID 242
+
+
+ CPU count real total virtual total delay total delay average delay max delay min
+ 39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms
+ IO count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ SWAP count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ RECLAIM count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ THRASHING count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ COMPACT count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
+ WPCOPY count delay total delay average delay max delay min
+ 156 11215873 0.072ms 0.207403ms 0.033913ms
+ IRQ count delay total delay average delay max delay min
+ 0 0 0.000ms 0.000000ms 0.000000ms
Get IO accounting for pid 1, it works only with -p::
@@ -131,3 +131,84 @@ Get IO accounting for pid 1, it works only with -p::
linuxrc: read=65536, write=0, cancelled_write=0
The above command can be used with -v to get more debug information.
+
+After the system starts, use `delaytop` to get the system-wide delay information,
+which includes system-wide PSI information and Top-N high-latency tasks.
+Note: PSI support requires `CONFIG_PSI=y` and `psi=1` for full functionality.
+
+`delaytop` is an interactive tool for monitoring system pressure and task delays.
+It supports multiple sorting options, display modes, and real-time keyboard controls.
+
+Basic usage with default settings (sorts by CPU delay, shows top 20 tasks, refreshes every 2 seconds)::
+
+ bash# ./delaytop
+ System Pressure Information: (avg10/avg60vg300/total)
+ CPU some: 0.0%/ 0.0%/ 0.0%/ 106137(ms)
+ CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
+ Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
+ Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms)
+ IO full: 0.0%/ 0.0%/ 0.0%/ 2240(ms)
+ IO some: 0.0%/ 0.0%/ 0.0%/ 2783(ms)
+ IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
+ [o]sort [M]memverbose [q]quit
+ Top 20 processes (sorted by cpu delay):
+ PID TGID COMMAND CPU(ms) IO(ms) IRQ(ms) MEM(ms)
+ ------------------------------------------------------------------------
+ 110 110 kworker/15:0H-s 27.91 0.00 0.00 0.00
+ 57 57 cpuhp/7 3.18 0.00 0.00 0.00
+ 99 99 cpuhp/14 2.97 0.00 0.00 0.00
+ 51 51 cpuhp/6 0.90 0.00 0.00 0.00
+ 44 44 kworker/4:0H-sy 0.80 0.00 0.00 0.00
+ 60 60 ksoftirqd/7 0.74 0.00 0.00 0.00
+ 76 76 idle_inject/10 0.31 0.00 0.00 0.00
+ 100 100 idle_inject/14 0.30 0.00 0.00 0.00
+ 1309 1309 systemsettings 0.29 0.00 0.00 0.00
+ 45 45 cpuhp/5 0.22 0.00 0.00 0.00
+ 63 63 cpuhp/8 0.20 0.00 0.00 0.00
+ 87 87 cpuhp/12 0.18 0.00 0.00 0.00
+ 93 93 cpuhp/13 0.17 0.00 0.00 0.00
+ 1265 1265 acpid 0.17 0.00 0.00 0.00
+ 1552 1552 sshd 0.17 0.00 0.00 0.00
+ 2584 2584 sddm-helper 0.16 0.00 0.00 0.00
+ 1284 1284 rtkit-daemon 0.15 0.00 0.00 0.00
+ 1326 1326 nde-netfilter 0.14 0.00 0.00 0.00
+ 27 27 cpuhp/2 0.13 0.00 0.00 0.00
+ 631 631 kworker/11:2-rc 0.11 0.00 0.00 0.00
+
+Interactive keyboard controls during runtime::
+
+ o - Select sort field (CPU, IO, IRQ, Memory, etc.)
+ M - Toggle display mode (Default/Memory Verbose)
+ q - Quit
+
+Available sort fields(use -s/--sort or interactive command)::
+
+ cpu(c) - CPU delay
+ blkio(i) - I/O delay
+ irq(q) - IRQ delay
+ mem(m) - Total memory delay
+ swapin(s) - Swapin delay (memory verbose mode only)
+ freepages(r) - Freepages reclaim delay (memory verbose mode only)
+ thrashing(t) - Thrashing delay (memory verbose mode only)
+ compact(p) - Compaction delay (memory verbose mode only)
+ wpcopy(w) - Write page copy delay (memory verbose mode only)
+
+Advanced usage examples::
+
+ # ./delaytop -s blkio
+ Sorted by IO delay
+
+ # ./delaytop -s mem -M
+ Sorted by memory delay in memory verbose mode
+
+ # ./delaytop -p pid
+ Print delayacct stats
+
+ # ./delaytop -P num
+ Display the top N tasks
+
+ # ./delaytop -n num
+ Set delaytop refresh frequency (num times)
+
+ # ./delaytop -d secs
+ Specify refresh interval as secs
diff --git a/Documentation/accounting/taskstats-struct.rst b/Documentation/accounting/taskstats-struct.rst
index ca90fd489c9a..acca51c34157 100644
--- a/Documentation/accounting/taskstats-struct.rst
+++ b/Documentation/accounting/taskstats-struct.rst
@@ -47,7 +47,7 @@ should not change the relative position of each field within the struct.
1) Common and basic accounting fields::
/* The version number of this struct. This field is always set to
- * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
+ * TASKSTATS_VERSION, which is defined in <linux/taskstats.h>.
* Each time the struct is changed, the value should be incremented.
*/
__u16 version;
diff --git a/Documentation/accounting/taskstats.rst b/Documentation/accounting/taskstats.rst
index 2a28b7f55c10..173c1e7bf5ef 100644
--- a/Documentation/accounting/taskstats.rst
+++ b/Documentation/accounting/taskstats.rst
@@ -76,41 +76,43 @@ The messages are in the format::
The taskstats payload is one of the following three kinds:
1. Commands: Sent from user to kernel. Commands to get data on
-a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID,
-containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes
-the task/process for which userspace wants statistics.
-
-Commands to register/deregister interest in exit data from a set of cpus
-consist of one attribute, of type
-TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the
-attribute payload. The cpumask is specified as an ascii string of
-comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8
-the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest
-in cpus before closing the listening socket, the kernel cleans up its interest
-set over time. However, for the sake of efficiency, an explicit deregistration
-is advisable.
+ a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID,
+ containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes
+ the task/process for which userspace wants statistics.
+
+ Commands to register/deregister interest in exit data from a set of cpus
+ consist of one attribute, of type
+ TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the
+ attribute payload. The cpumask is specified as an ascii string of
+ comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8
+ the cpumask would be "1-3,5,7-8". If userspace forgets to deregister
+ interest in cpus before closing the listening socket, the kernel cleans up
+ its interest set over time. However, for the sake of efficiency, an explicit
+ deregistration is advisable.
2. Response for a command: sent from the kernel in response to a userspace
-command. The payload is a series of three attributes of type:
+ command. The payload is a series of three attributes of type:
-a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates
-a pid/tgid will be followed by some stats.
+ a) TASKSTATS_TYPE_AGGR_PID/TGID: attribute containing no payload but
+ indicates a pid/tgid will be followed by some stats.
-b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats
-are being returned.
+ b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose
+ stats are being returned.
-c) TASKSTATS_TYPE_STATS: attribute with a struct taskstats as payload. The
-same structure is used for both per-pid and per-tgid stats.
+ c) TASKSTATS_TYPE_STATS: attribute with a struct taskstats as payload. The
+ same structure is used for both per-pid and per-tgid stats.
3. New message sent by kernel whenever a task exits. The payload consists of a
series of attributes of the following type:
-a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
-b) TASKSTATS_TYPE_PID: contains exiting task's pid
-c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
-d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats
-e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
-f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process
+ a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
+ b) TASKSTATS_TYPE_PID: contains exiting task's pid
+ c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
+ d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be
+ tgid+stats
+ e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
+ f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's
+ process
per-tgid stats
diff --git a/Documentation/admin-guide/LSM/SELinux.rst b/Documentation/admin-guide/LSM/SELinux.rst
index 520a1c2c6fd2..cdd65164ca96 100644
--- a/Documentation/admin-guide/LSM/SELinux.rst
+++ b/Documentation/admin-guide/LSM/SELinux.rst
@@ -2,6 +2,17 @@
SELinux
=======
+Information about the SELinux kernel subsystem can be found at the
+following links:
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git/tree/README.md
+
+ https://github.com/selinuxproject/selinux-kernel/wiki
+
+Information about the SELinux userspace can be found at:
+
+ https://github.com/SELinuxProject/selinux/wiki
+
If you want to use SELinux, chances are you will want
to use the distro-provided policies, or install the
latest reference policy release from
diff --git a/Documentation/admin-guide/LSM/SafeSetID.rst b/Documentation/admin-guide/LSM/SafeSetID.rst
index 0ec34863c674..6d439c987563 100644
--- a/Documentation/admin-guide/LSM/SafeSetID.rst
+++ b/Documentation/admin-guide/LSM/SafeSetID.rst
@@ -41,7 +41,7 @@ namespace). The higher level goal is to allow for uid-based sandboxing of system
services without having to give out CAP_SETUID all over the place just so that
non-root programs can drop to even-lesser-privileged uids. This is especially
relevant when one non-root daemon on the system should be allowed to spawn other
-processes as different uids, but its undesirable to give the daemon a
+processes as different uids, but it's undesirable to give the daemon a
basically-root-equivalent CAP_SETUID.
diff --git a/Documentation/admin-guide/LSM/Smack.rst b/Documentation/admin-guide/LSM/Smack.rst
index 6d44f4fdbf59..c5ed775f2d10 100644
--- a/Documentation/admin-guide/LSM/Smack.rst
+++ b/Documentation/admin-guide/LSM/Smack.rst
@@ -601,10 +601,15 @@ specification.
Task Attribute
~~~~~~~~~~~~~~
-The Smack label of a process can be read from /proc/<pid>/attr/current. A
-process can read its own Smack label from /proc/self/attr/current. A
+The Smack label of a process can be read from ``/proc/<pid>/attr/current``. A
+process can read its own Smack label from ``/proc/self/attr/current``. A
privileged process can change its own Smack label by writing to
-/proc/self/attr/current but not the label of another process.
+``/proc/self/attr/current`` but not the label of another process.
+
+Format of writing is : only the label or the label followed by one of the
+3 trailers: ``\n`` (by common agreement for ``/proc/...`` interfaces),
+``\0`` (because some applications incorrectly include it),
+``\n\0`` (because we think some applications may incorrectly include it).
File Attribute
~~~~~~~~~~~~~~
@@ -696,6 +701,11 @@ sockets.
A privileged program may set this to match the label of another
task with which it hopes to communicate.
+UNIX domain socket (UDS) with a BSD address functions both as a file in a
+filesystem and as a socket. As a file, it carries the SMACK64 attribute. This
+attribute is not involved in Smack security enforcement and is immutably
+assigned the label "*".
+
Smack Netlabel Exceptions
~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/admin-guide/LSM/apparmor.rst b/Documentation/admin-guide/LSM/apparmor.rst
index 6cf81bbd7ce8..47939ee89d74 100644
--- a/Documentation/admin-guide/LSM/apparmor.rst
+++ b/Documentation/admin-guide/LSM/apparmor.rst
@@ -18,8 +18,11 @@ set ``CONFIG_SECURITY_APPARMOR=y``
If AppArmor should be selected as the default security module then set::
- CONFIG_DEFAULT_SECURITY="apparmor"
- CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE=1
+ CONFIG_DEFAULT_SECURITY_APPARMOR=y
+
+The CONFIG_LSM parameter manages the order and selection of LSMs.
+Specify apparmor as the first "major" module (e.g. AppArmor, SELinux, Smack)
+in the list.
Build the kernel
diff --git a/Documentation/admin-guide/LSM/index.rst b/Documentation/admin-guide/LSM/index.rst
index a6ba95fbaa9f..b44ef68f6e4d 100644
--- a/Documentation/admin-guide/LSM/index.rst
+++ b/Documentation/admin-guide/LSM/index.rst
@@ -47,3 +47,5 @@ subdirectories.
tomoyo
Yama
SafeSetID
+ ipe
+ landlock
diff --git a/Documentation/admin-guide/LSM/ipe.rst b/Documentation/admin-guide/LSM/ipe.rst
new file mode 100644
index 000000000000..a756d8158531
--- /dev/null
+++ b/Documentation/admin-guide/LSM/ipe.rst
@@ -0,0 +1,835 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Integrity Policy Enforcement (IPE)
+==================================
+
+.. NOTE::
+
+ This is the documentation for admins, system builders, or individuals
+ attempting to use IPE. If you're looking for more developer-focused
+ documentation about IPE please see :doc:`the design docs </security/ipe>`.
+
+Overview
+--------
+
+Integrity Policy Enforcement (IPE) is a Linux Security Module that takes a
+complementary approach to access control. Unlike traditional access control
+mechanisms that rely on labels and paths for decision-making, IPE focuses
+on the immutable security properties inherent to system components. These
+properties are fundamental attributes or features of a system component
+that cannot be altered, ensuring a consistent and reliable basis for
+security decisions.
+
+To elaborate, in the context of IPE, system components primarily refer to
+files or the devices these files reside on. However, this is just a
+starting point. The concept of system components is flexible and can be
+extended to include new elements as the system evolves. The immutable
+properties include the origin of a file, which remains constant and
+unchangeable over time. For example, IPE policies can be crafted to trust
+files originating from the initramfs. Since initramfs is typically verified
+by the bootloader, its files are deemed trustworthy; "file is from
+initramfs" becomes an immutable property under IPE's consideration.
+
+The immutable property concept extends to the security features enabled on
+a file's origin, such as dm-verity or fs-verity, which provide a layer of
+integrity and trust. For example, IPE allows the definition of policies
+that trust files from a dm-verity protected device. dm-verity ensures the
+integrity of an entire device by providing a verifiable and immutable state
+of its contents. Similarly, fs-verity offers filesystem-level integrity
+checks, allowing IPE to enforce policies that trust files protected by
+fs-verity. These two features cannot be turned off once established, so
+they are considered immutable properties. These examples demonstrate how
+IPE leverages immutable properties, such as a file's origin and its
+integrity protection mechanisms, to make access control decisions.
+
+For the IPE policy, specifically, it grants the ability to enforce
+stringent access controls by assessing security properties against
+reference values defined within the policy. This assessment can be based on
+the existence of a security property (e.g., verifying if a file originates
+from initramfs) or evaluating the internal state of an immutable security
+property. The latter includes checking the roothash of a dm-verity
+protected device, determining whether dm-verity possesses a valid
+signature, assessing the digest of a fs-verity protected file, or
+determining whether fs-verity possesses a valid built-in signature. This
+nuanced approach to policy enforcement enables a highly secure and
+customizable system defense mechanism, tailored to specific security
+requirements and trust models.
+
+To enable IPE, ensure that ``CONFIG_SECURITY_IPE`` (under
+:menuselection:`Security -> Integrity Policy Enforcement (IPE)`) config
+option is enabled.
+
+Use Cases
+---------
+
+IPE works best in fixed-function devices: devices in which their purpose
+is clearly defined and not supposed to be changed (e.g. network firewall
+device in a data center, an IoT device, etcetera), where all software and
+configuration is built and provisioned by the system owner.
+
+IPE is a long-way off for use in general-purpose computing: the Linux
+community as a whole tends to follow a decentralized trust model (known as
+the web of trust), which IPE has no support for it yet. Instead, IPE
+supports PKI (public key infrastructure), which generally designates a
+set of trusted entities that provide a measure of absolute trust.
+
+Additionally, while most packages are signed today, the files inside
+the packages (for instance, the executables), tend to be unsigned. This
+makes it difficult to utilize IPE in systems where a package manager is
+expected to be functional, without major changes to the package manager
+and ecosystem behind it.
+
+The digest_cache LSM [#digest_cache_lsm]_ is a system that when combined with IPE,
+could be used to enable and support general-purpose computing use cases.
+
+Known Limitations
+-----------------
+
+IPE cannot verify the integrity of anonymous executable memory, such as
+the trampolines created by gcc closures and libffi (<3.4.2), or JIT'd code.
+Unfortunately, as this is dynamically generated code, there is no way
+for IPE to ensure the integrity of this code to form a trust basis.
+
+IPE cannot verify the integrity of programs written in interpreted
+languages when these scripts are invoked by passing these program files
+to the interpreter. This is because the way interpreters execute these
+files; the scripts themselves are not evaluated as executable code
+through one of IPE's hooks, but they are merely text files that are read
+(as opposed to compiled executables). However, with the introduction of the
+``AT_EXECVE_CHECK`` flag (:doc:`AT_EXECVE_CHECK </userspace-api/check_exec>`),
+interpreters can use it to signal the kernel that a script file will be executed,
+and request the kernel to perform LSM security checks on it.
+
+IPE's EXECUTE operation enforcement differs between compiled executables and
+interpreted scripts: For compiled executables, enforcement is triggered
+automatically by the kernel during ``execve()``, ``execveat()``, ``mmap()``
+and ``mprotect()`` syscalls when loading executable content. For interpreted
+scripts, enforcement requires explicit interpreter integration using
+``execveat()`` with ``AT_EXECVE_CHECK`` flag. Unlike exec syscalls that IPE
+intercepts during the execution process, this mechanism needs the interpreter
+to take the initiative, and existing interpreters won't be automatically
+supported unless the signal call is added.
+
+Threat Model
+------------
+
+IPE specifically targets the risk of tampering with user-space executable
+code after the kernel has initially booted, including the kernel modules
+loaded from userspace via ``modprobe`` or ``insmod``.
+
+To illustrate, consider a scenario where an untrusted binary, possibly
+malicious, is downloaded along with all necessary dependencies, including a
+loader and libc. The primary function of IPE in this context is to prevent
+the execution of such binaries and their dependencies.
+
+IPE achieves this by verifying the integrity and authenticity of all
+executable code before allowing them to run. It conducts a thorough
+check to ensure that the code's integrity is intact and that they match an
+authorized reference value (digest, signature, etc) as per the defined
+policy. If a binary does not pass this verification process, either
+because its integrity has been compromised or it does not meet the
+authorization criteria, IPE will deny its execution. Additionally, IPE
+generates audit logs which may be utilized to detect and analyze failures
+resulting from policy violation.
+
+Tampering threat scenarios include modification or replacement of
+executable code by a range of actors including:
+
+- Actors with physical access to the hardware
+- Actors with local network access to the system
+- Actors with access to the deployment system
+- Compromised internal systems under external control
+- Malicious end users of the system
+- Compromised end users of the system
+- Remote (external) compromise of the system
+
+IPE does not mitigate threats arising from malicious but authorized
+developers (with access to a signing certificate), or compromised
+developer tools used by them (i.e. return-oriented programming attacks).
+Additionally, IPE draws hard security boundary between userspace and
+kernelspace. As a result, kernel-level exploits are considered outside
+the scope of IPE and mitigation is left to other mechanisms.
+
+Policy
+------
+
+IPE policy is a plain-text [#devdoc]_ policy composed of multiple statements
+over several lines. There is one required line, at the top of the
+policy, indicating the policy name, and the policy version, for
+instance::
+
+ policy_name=Ex_Policy policy_version=0.0.0
+
+The policy name is a unique key identifying this policy in a human
+readable name. This is used to create nodes under securityfs as well as
+uniquely identify policies to deploy new policies vs update existing
+policies.
+
+The policy version indicates the current version of the policy (NOT the
+policy syntax version). This is used to prevent rollback of policy to
+potentially insecure previous versions of the policy.
+
+The next portion of IPE policy are rules. Rules are formed by key=value
+pairs, known as properties. IPE rules require two properties: ``action``,
+which determines what IPE does when it encounters a match against the
+rule, and ``op``, which determines when the rule should be evaluated.
+The ordering is significant, a rule must start with ``op``, and end with
+``action``. Thus, a minimal rule is::
+
+ op=EXECUTE action=ALLOW
+
+This example will allow any execution. Additional properties are used to
+assess immutable security properties about the files being evaluated.
+These properties are intended to be descriptions of systems within the
+kernel that can provide a measure of integrity verification, such that IPE
+can determine the trust of the resource based on the value of the property.
+
+Rules are evaluated top-to-bottom. As a result, any revocation rules,
+or denies should be placed early in the file to ensure that these rules
+are evaluated before a rule with ``action=ALLOW``.
+
+IPE policy supports comments. The character '#' will function as a
+comment, ignoring all characters to the right of '#' until the newline.
+
+The default behavior of IPE evaluations can also be expressed in policy,
+through the ``DEFAULT`` statement. This can be done at a global level,
+or a per-operation level::
+
+ # Global
+ DEFAULT action=ALLOW
+
+ # Operation Specific
+ DEFAULT op=EXECUTE action=ALLOW
+
+A default must be set for all known operations in IPE. If you want to
+preserve older policies being compatible with newer kernels that can introduce
+new operations, set a global default of ``ALLOW``, then override the
+defaults on a per-operation basis (as above).
+
+With configurable policy-based LSMs, there's several issues with
+enforcing the configurable policies at startup, around reading and
+parsing the policy:
+
+1. The kernel *should* not read files from userspace, so directly reading
+ the policy file is prohibited.
+2. The kernel command line has a character limit, and one kernel module
+ should not reserve the entire character limit for its own
+ configuration.
+3. There are various boot loaders in the kernel ecosystem, so handing
+ off a memory block would be costly to maintain.
+
+As a result, IPE has addressed this problem through a concept of a "boot
+policy". A boot policy is a minimal policy which is compiled into the
+kernel. This policy is intended to get the system to a state where
+userspace is set up and ready to receive commands, at which point a more
+complex policy can be deployed via securityfs. The boot policy can be
+specified via ``SECURITY_IPE_BOOT_POLICY`` config option, which accepts
+a path to a plain-text version of the IPE policy to apply. This policy
+will be compiled into the kernel. If not specified, IPE will be disabled
+until a policy is deployed and activated through securityfs.
+
+Deploying Policies
+~~~~~~~~~~~~~~~~~~
+
+Policies can be deployed from userspace through securityfs. These policies
+are signed through the PKCS#7 message format to enforce some level of
+authorization of the policies (prohibiting an attacker from gaining
+unconstrained root, and deploying an "allow all" policy). These
+policies must be signed by a certificate that chains to the
+``SYSTEM_TRUSTED_KEYRING``, or to the secondary and/or platform keyrings if
+``CONFIG_IPE_POLICY_SIG_SECONDARY_KEYRING`` and/or
+``CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING`` are enabled, respectively.
+With openssl, the policy can be signed by::
+
+ openssl smime -sign \
+ -in "$MY_POLICY" \
+ -signer "$MY_CERTIFICATE" \
+ -inkey "$MY_PRIVATE_KEY" \
+ -noattr \
+ -nodetach \
+ -nosmimecap \
+ -outform der \
+ -out "$MY_POLICY.p7b"
+
+Deploying the policies is done through securityfs, through the
+``new_policy`` node. To deploy a policy, simply cat the file into the
+securityfs node::
+
+ cat "$MY_POLICY.p7b" > /sys/kernel/security/ipe/new_policy
+
+Upon success, this will create one subdirectory under
+``/sys/kernel/security/ipe/policies/``. The subdirectory will be the
+``policy_name`` field of the policy deployed, so for the example above,
+the directory will be ``/sys/kernel/security/ipe/policies/Ex_Policy``.
+Within this directory, there will be seven files: ``pkcs7``, ``policy``,
+``name``, ``version``, ``active``, ``update``, and ``delete``.
+
+The ``pkcs7`` file is read-only. Reading it returns the raw PKCS#7 data
+that was provided to the kernel, representing the policy. If the policy being
+read is the boot policy, this will return ``ENOENT``, as it is not signed.
+
+The ``policy`` file is read only. Reading it returns the PKCS#7 inner
+content of the policy, which will be the plain text policy.
+
+The ``active`` file is used to set a policy as the currently active policy.
+This file is rw, and accepts a value of ``"1"`` to set the policy as active.
+Since only a single policy can be active at one time, all other policies
+will be marked inactive. The policy being marked active must have a policy
+version greater or equal to the currently-running version.
+
+The ``update`` file is used to update a policy that is already present
+in the kernel. This file is write-only and accepts a PKCS#7 signed
+policy. Two checks will always be performed on this policy: First, the
+``policy_names`` must match with the updated version and the existing
+version. Second the updated policy must have a policy version greater than
+the currently-running version. This is to prevent rollback attacks.
+
+The ``delete`` file is used to remove a policy that is no longer needed.
+This file is write-only and accepts a value of ``1`` to delete the policy.
+On deletion, the securityfs node representing the policy will be removed.
+However, delete the current active policy is not allowed and will return
+an operation not permitted error.
+
+Similarly, writing to both ``update`` and ``new_policy`` could result in
+bad message(policy syntax error) or file exists error. The latter error happens
+when trying to deploy a policy with a ``policy_name`` while the kernel already
+has a deployed policy with the same ``policy_name``.
+
+Deploying a policy will *not* cause IPE to start enforcing the policy. IPE will
+only enforce the policy marked active. Note that only one policy can be active
+at a time.
+
+Once deployment is successful, the policy can be activated, by writing file
+``/sys/kernel/security/ipe/policies/$policy_name/active``.
+For example, the ``Ex_Policy`` can be activated by::
+
+ echo 1 > "/sys/kernel/security/ipe/policies/Ex_Policy/active"
+
+From above point on, ``Ex_Policy`` is now the enforced policy on the
+system.
+
+IPE also provides a way to delete policies. This can be done via the
+``delete`` securityfs node,
+``/sys/kernel/security/ipe/policies/$policy_name/delete``.
+Writing ``1`` to that file deletes the policy::
+
+ echo 1 > "/sys/kernel/security/ipe/policies/$policy_name/delete"
+
+There is only one requirement to delete a policy: the policy being deleted
+must be inactive.
+
+.. NOTE::
+
+ If a traditional MAC system is enabled (SELinux, apparmor, smack), all
+ writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
+
+Modes
+~~~~~
+
+IPE supports two modes of operation: permissive (similar to SELinux's
+permissive mode) and enforced. In permissive mode, all events are
+checked and policy violations are logged, but the policy is not really
+enforced. This allows users to test policies before enforcing them.
+
+The default mode is enforce, and can be changed via the kernel command
+line parameter ``ipe.enforce=(0|1)``, or the securityfs node
+``/sys/kernel/security/ipe/enforce``.
+
+.. NOTE::
+
+ If a traditional MAC system is enabled (SELinux, apparmor, smack, etcetera),
+ all writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
+
+Audit Events
+~~~~~~~~~~~~
+
+1420 AUDIT_IPE_ACCESS
+^^^^^^^^^^^^^^^^^^^^^
+Event Examples::
+
+ type=1420 audit(1653364370.067:61): ipe_op=EXECUTE ipe_hook=MMAP enforcing=1 pid=2241 comm="ld-linux.so" path="/deny/lib/libc.so.6" dev="sda2" ino=14549020 rule="DEFAULT action=DENY"
+ type=1300 audit(1653364370.067:61): SYSCALL arch=c000003e syscall=9 success=no exit=-13 a0=7f1105a28000 a1=195000 a2=5 a3=812 items=0 ppid=2219 pid=2241 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="ld-linux.so" exe="/tmp/ipe-test/lib/ld-linux.so" subj=unconfined key=(null)
+ type=1327 audit(1653364370.067:61): 707974686F6E3300746573742F6D61696E2E7079002D6E00
+
+ type=1420 audit(1653364735.161:64): ipe_op=EXECUTE ipe_hook=MMAP enforcing=1 pid=2472 comm="mmap_test" path=? dev=? ino=? rule="DEFAULT action=DENY"
+ type=1300 audit(1653364735.161:64): SYSCALL arch=c000003e syscall=9 success=no exit=-13 a0=0 a1=1000 a2=4 a3=21 items=0 ppid=2219 pid=2472 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="mmap_test" exe="/root/overlake_test/upstream_test/vol_fsverity/bin/mmap_test" subj=unconfined key=(null)
+ type=1327 audit(1653364735.161:64): 707974686F6E3300746573742F6D61696E2E7079002D6E00
+
+This event indicates that IPE made an access control decision; the IPE
+specific record (1420) is always emitted in conjunction with a
+``AUDITSYSCALL`` record.
+
+Determining whether IPE is in permissive or enforced mode can be derived
+from ``success`` property and exit code of the ``AUDITSYSCALL`` record.
+
+
+Field descriptions:
+
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| Field | Value Type | Optional? | Description of Value |
++===========+============+===========+=================================================================================+
+| ipe_op | string | No | The IPE operation name associated with the log |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| ipe_hook | string | No | The name of the LSM hook that triggered the IPE event |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| enforcing | integer | No | The current IPE enforcing state 1 is in enforcing mode, 0 is in permissive mode |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| pid | integer | No | The pid of the process that triggered the IPE event. |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| comm | string | No | The command line program name of the process that triggered the IPE event |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| path | string | Yes | The absolute path to the evaluated file |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| ino | integer | Yes | The inode number of the evaluated file |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| dev | string | Yes | The device name of the evaluated file, e.g. vda |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+| rule | string | No | The matched policy rule |
++-----------+------------+-----------+---------------------------------------------------------------------------------+
+
+1421 AUDIT_IPE_CONFIG_CHANGE
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Event Example::
+
+ type=1421 audit(1653425583.136:54): old_active_pol_name="Allow_All" old_active_pol_version=0.0.0 old_policy_digest=sha256:E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 new_active_pol_name="boot_verified" new_active_pol_version=0.0.0 new_policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1
+ type=1300 audit(1653425583.136:54): SYSCALL arch=c000003e syscall=1 success=yes exit=2 a0=3 a1=5596fcae1fb0 a2=2 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
+ type=1327 audit(1653425583.136:54): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2
+
+This event indicates that IPE switched the active poliy from one to another
+along with the version and the hash digest of the two policies.
+Note IPE can only have one policy active at a time, all access decision
+evaluation is based on the current active policy.
+The normal procedure to deploy a new policy is loading the policy to deploy
+into the kernel first, then switch the active policy to it.
+
+This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
+
+Field descriptions:
+
++------------------------+------------+-----------+---------------------------------------------------+
+| Field | Value Type | Optional? | Description of Value |
++========================+============+===========+===================================================+
+| old_active_pol_name | string | Yes | The name of previous active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| old_active_pol_version | string | Yes | The version of previous active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| old_policy_digest | string | Yes | The hash of previous active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| new_active_pol_name | string | No | The name of current active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| new_active_pol_version | string | No | The version of current active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| new_policy_digest | string | No | The hash of current active policy |
++------------------------+------------+-----------+---------------------------------------------------+
+| auid | integer | No | The login user ID |
++------------------------+------------+-----------+---------------------------------------------------+
+| ses | integer | No | The login session ID |
++------------------------+------------+-----------+---------------------------------------------------+
+| lsm | string | No | The lsm name associated with the event |
++------------------------+------------+-----------+---------------------------------------------------+
+| res | integer | No | The result of the audited operation(success/fail) |
++------------------------+------------+-----------+---------------------------------------------------+
+
+1422 AUDIT_IPE_POLICY_LOAD
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Event Example::
+
+ type=1422 audit(1653425529.927:53): policy_name="boot_verified" policy_version=0.0.0 policy_digest=sha256:820EEA5B40CA42B51F68962354BA083122A20BB846F26765076DD8EED7B8F4DB auid=4294967295 ses=4294967295 lsm=ipe res=1 errno=0
+ type=1300 audit(1653425529.927:53): arch=c000003e syscall=1 success=yes exit=2567 a0=3 a1=5596fcae1fb0 a2=a07 a3=2 items=0 ppid=184 pid=229 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=4294967295 comm="python3" exe="/usr/bin/python3.10" key=(null)
+ type=1327 audit(1653425529.927:53): PROCTITLE proctitle=707974686F6E3300746573742F6D61696E2E7079002D66002E2E
+
+This record indicates a new policy has been loaded into the kernel with the policy name, policy version and policy hash.
+
+This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
+
+Field descriptions:
+
++----------------+------------+-----------+-------------------------------------------------------------+
+| Field | Value Type | Optional? | Description of Value |
++================+============+===========+=============================================================+
+| policy_name | string | Yes | The policy_name |
++----------------+------------+-----------+-------------------------------------------------------------+
+| policy_version | string | Yes | The policy_version |
++----------------+------------+-----------+-------------------------------------------------------------+
+| policy_digest | string | Yes | The policy hash |
++----------------+------------+-----------+-------------------------------------------------------------+
+| auid | integer | No | The login user ID |
++----------------+------------+-----------+-------------------------------------------------------------+
+| ses | integer | No | The login session ID |
++----------------+------------+-----------+-------------------------------------------------------------+
+| lsm | string | No | The lsm name associated with the event |
++----------------+------------+-----------+-------------------------------------------------------------+
+| res | integer | No | The result of the audited operation(success/fail) |
++----------------+------------+-----------+-------------------------------------------------------------+
+| errno | integer | No | Error code from policy loading operations (see table below) |
++----------------+------------+-----------+-------------------------------------------------------------+
+
+Policy error codes (errno):
+
+The following table lists the error codes that may appear in the errno field while loading or updating the policy:
+
++----------------+--------------------------------------------------------+
+| Error Code | Description |
++================+========================================================+
+| 0 | Success |
++----------------+--------------------------------------------------------+
+| -EPERM | Insufficient permission |
++----------------+--------------------------------------------------------+
+| -EEXIST | Same name policy already deployed |
++----------------+--------------------------------------------------------+
+| -EBADMSG | Policy is invalid |
++----------------+--------------------------------------------------------+
+| -ENOMEM | Out of memory (OOM) |
++----------------+--------------------------------------------------------+
+| -ERANGE | Policy version number overflow |
++----------------+--------------------------------------------------------+
+| -EINVAL | Policy version parsing error |
++----------------+--------------------------------------------------------+
+| -ENOKEY | Key used to sign the IPE policy not found in keyring |
++----------------+--------------------------------------------------------+
+| -EKEYREJECTED | Policy signature verification failed |
++----------------+--------------------------------------------------------+
+| -ESTALE | Attempting to update an IPE policy with older version |
++----------------+--------------------------------------------------------+
+| -ENOENT | Policy was deleted while updating |
++----------------+--------------------------------------------------------+
+
+1404 AUDIT_MAC_STATUS
+^^^^^^^^^^^^^^^^^^^^^
+
+Event Examples::
+
+ type=1404 audit(1653425689.008:55): enforcing=0 old_enforcing=1 auid=4294967295 ses=4294967295 enabled=1 old-enabled=1 lsm=ipe res=1
+ type=1300 audit(1653425689.008:55): arch=c000003e syscall=1 success=yes exit=2 a0=1 a1=55c1065e5c60 a2=2 a3=0 items=0 ppid=405 pid=441 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=)
+ type=1327 audit(1653425689.008:55): proctitle="-bash"
+
+ type=1404 audit(1653425689.008:55): enforcing=1 old_enforcing=0 auid=4294967295 ses=4294967295 enabled=1 old-enabled=1 lsm=ipe res=1
+ type=1300 audit(1653425689.008:55): arch=c000003e syscall=1 success=yes exit=2 a0=1 a1=55c1065e5c60 a2=2 a3=0 items=0 ppid=405 pid=441 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=)
+ type=1327 audit(1653425689.008:55): proctitle="-bash"
+
+This record will always be emitted in conjunction with a ``AUDITSYSCALL`` record for the ``write`` syscall.
+
+Field descriptions:
+
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| Field | Value Type | Optional? | Description of Value |
++===============+============+===========+=================================================================================================+
+| enforcing | integer | No | The enforcing state IPE is being switched to, 1 is in enforcing mode, 0 is in permissive mode |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| old_enforcing | integer | No | The enforcing state IPE is being switched from, 1 is in enforcing mode, 0 is in permissive mode |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| auid | integer | No | The login user ID |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| ses | integer | No | The login session ID |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| enabled | integer | No | The new TTY audit enabled setting |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| old-enabled | integer | No | The old TTY audit enabled setting |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| lsm | string | No | The lsm name associated with the event |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+| res | integer | No | The result of the audited operation(success/fail) |
++---------------+------------+-----------+-------------------------------------------------------------------------------------------------+
+
+
+Success Auditing
+^^^^^^^^^^^^^^^^
+
+IPE supports success auditing. When enabled, all events that pass IPE
+policy and are not blocked will emit an audit event. This is disabled by
+default, and can be enabled via the kernel command line
+``ipe.success_audit=(0|1)`` or
+``/sys/kernel/security/ipe/success_audit`` securityfs file.
+
+This is *very* noisy, as IPE will check every userspace binary on the
+system, but is useful for debugging policies.
+
+.. NOTE::
+
+ If a traditional MAC system is enabled (SELinux, apparmor, smack, etcetera),
+ all writes to ipe's securityfs nodes require ``CAP_MAC_ADMIN``.
+
+Properties
+----------
+
+As explained above, IPE properties are ``key=value`` pairs expressed in IPE
+policy. Two properties are built-into the policy parser: 'op' and 'action'.
+The other properties are used to restrict immutable security properties
+about the files being evaluated. Currently those properties are:
+'``boot_verified``', '``dmverity_signature``', '``dmverity_roothash``',
+'``fsverity_signature``', '``fsverity_digest``'. A description of all
+properties supported by IPE are listed below:
+
+op
+~~
+
+Indicates the operation for a rule to apply to. Must be in every rule,
+as the first token. IPE supports the following operations:
+
+ ``EXECUTE``
+
+ Pertains to any file attempting to be executed, or loaded as an
+ executable.
+
+ ``FIRMWARE``:
+
+ Pertains to firmware being loaded via the firmware_class interface.
+ This covers both the preallocated buffer and the firmware file
+ itself.
+
+ ``KMODULE``:
+
+ Pertains to loading kernel modules via ``modprobe`` or ``insmod``.
+
+ ``KEXEC_IMAGE``:
+
+ Pertains to kernel images loading via ``kexec``.
+
+ ``KEXEC_INITRAMFS``
+
+ Pertains to initrd images loading via ``kexec --initrd``.
+
+ ``POLICY``:
+
+ Controls loading policies via reading a kernel-space initiated read.
+
+ An example of such is loading IMA policies by writing the path
+ to the policy file to ``$securityfs/ima/policy``
+
+ ``X509_CERT``:
+
+ Controls loading IMA certificates through the Kconfigs,
+ ``CONFIG_IMA_X509_PATH`` and ``CONFIG_EVM_X509_PATH``.
+
+action
+~~~~~~
+
+ Determines what IPE should do when a rule matches. Must be in every
+ rule, as the final clause. Can be one of:
+
+ ``ALLOW``:
+
+ If the rule matches, explicitly allow access to the resource to proceed
+ without executing any more rules.
+
+ ``DENY``:
+
+ If the rule matches, explicitly prohibit access to the resource to
+ proceed without executing any more rules.
+
+boot_verified
+~~~~~~~~~~~~~
+
+ This property can be utilized for authorization of files from initramfs.
+ The format of this property is::
+
+ boot_verified=(TRUE|FALSE)
+
+
+ .. WARNING::
+
+ This property will trust files from initramfs(rootfs). It should
+ only be used during early booting stage. Before mounting the real
+ rootfs on top of the initramfs, initramfs script will recursively
+ remove all files and directories on the initramfs. This is typically
+ implemented by using switch_root(8) [#switch_root]_. Therefore the
+ initramfs will be empty and not accessible after the real
+ rootfs takes over. It is advised to switch to a different policy
+ that doesn't rely on the property after this point.
+ This ensures that the trust policies remain relevant and effective
+ throughout the system's operation.
+
+dmverity_roothash
+~~~~~~~~~~~~~~~~~
+
+ This property can be utilized for authorization or revocation of
+ specific dm-verity volumes, identified via their root hashes. It has a
+ dependency on the DM_VERITY module. This property is controlled by
+ the ``IPE_PROP_DM_VERITY`` config option, it will be automatically
+ selected when ``SECURITY_IPE`` and ``DM_VERITY`` are all enabled.
+ The format of this property is::
+
+ dmverity_roothash=DigestName:HexadecimalString
+
+ The supported DigestNames for dmverity_roothash are [#dmveritydigests]_
+
+ + blake2b-512
+ + blake2s-256
+ + sha256
+ + sha384
+ + sha512
+ + sha3-224
+ + sha3-256
+ + sha3-384
+ + sha3-512
+ + sm3
+ + rmd160
+
+dmverity_signature
+~~~~~~~~~~~~~~~~~~
+
+ This property can be utilized for authorization of all dm-verity
+ volumes that have a signed roothash that validated by a keyring
+ specified by dm-verity's configuration, either the system trusted
+ keyring, or the secondary keyring. It depends on
+ ``DM_VERITY_VERIFY_ROOTHASH_SIG`` config option and is controlled by
+ the ``IPE_PROP_DM_VERITY_SIGNATURE`` config option, it will be automatically
+ selected when ``SECURITY_IPE``, ``DM_VERITY`` and
+ ``DM_VERITY_VERIFY_ROOTHASH_SIG`` are all enabled.
+ The format of this property is::
+
+ dmverity_signature=(TRUE|FALSE)
+
+fsverity_digest
+~~~~~~~~~~~~~~~
+
+ This property can be utilized for authorization of specific fsverity
+ enabled files, identified via their fsverity digests.
+ It depends on ``FS_VERITY`` config option and is controlled by
+ the ``IPE_PROP_FS_VERITY`` config option, it will be automatically
+ selected when ``SECURITY_IPE`` and ``FS_VERITY`` are all enabled.
+ The format of this property is::
+
+ fsverity_digest=DigestName:HexadecimalString
+
+ The supported DigestNames for fsverity_digest are [#fsveritydigest]_
+
+ + sha256
+ + sha512
+
+fsverity_signature
+~~~~~~~~~~~~~~~~~~
+
+ This property is used to authorize all fs-verity enabled files that have
+ been verified by fs-verity's built-in signature mechanism. The signature
+ verification relies on a key stored within the ".fs-verity" keyring. It
+ depends on ``FS_VERITY_BUILTIN_SIGNATURES`` config option and
+ it is controlled by the ``IPE_PROP_FS_VERITY`` config option,
+ it will be automatically selected when ``SECURITY_IPE``, ``FS_VERITY``
+ and ``FS_VERITY_BUILTIN_SIGNATURES`` are all enabled.
+ The format of this property is::
+
+ fsverity_signature=(TRUE|FALSE)
+
+Policy Examples
+---------------
+
+Allow all
+~~~~~~~~~
+
+::
+
+ policy_name=Allow_All policy_version=0.0.0
+ DEFAULT action=ALLOW
+
+Allow only initramfs
+~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Allow_Initramfs policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE boot_verified=TRUE action=ALLOW
+
+Allow any signed and validated dm-verity volume and the initramfs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Allow_Signed_DMV_And_Initramfs policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE boot_verified=TRUE action=ALLOW
+ op=EXECUTE dmverity_signature=TRUE action=ALLOW
+
+Prohibit execution from a specific dm-verity volume
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Deny_DMV_By_Roothash policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE dmverity_roothash=sha256:cd2c5bae7c6c579edaae4353049d58eb5f2e8be0244bf05345bc8e5ed257baff action=DENY
+
+ op=EXECUTE boot_verified=TRUE action=ALLOW
+ op=EXECUTE dmverity_signature=TRUE action=ALLOW
+
+Allow only a specific dm-verity volume
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Allow_DMV_By_Roothash policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE dmverity_roothash=sha256:401fcec5944823ae12f62726e8184407a5fa9599783f030dec146938 action=ALLOW
+
+Allow any fs-verity file with a valid built-in signature
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=Allow_Signed_And_Validated_FSVerity policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE fsverity_signature=TRUE action=ALLOW
+
+Allow execution of a specific fs-verity file
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ policy_name=ALLOW_FSV_By_Digest policy_version=0.0.0
+ DEFAULT action=DENY
+
+ op=EXECUTE fsverity_digest=sha256:fd88f2b8824e197f850bf4c5109bea5cf0ee38104f710843bb72da796ba5af9e action=ALLOW
+
+Additional Information
+----------------------
+
+- `Github Repository <https://github.com/microsoft/ipe>`_
+- :doc:`Developer and design docs for IPE </security/ipe>`
+
+FAQ
+---
+
+Q:
+ What's the difference between other LSMs which provide a measure of
+ trust-based access control?
+
+A:
+
+ In general, there's two other LSMs that can provide similar functionality:
+ IMA, and Loadpin.
+
+ IMA and IPE are functionally very similar. The significant difference between
+ the two is the policy. [#devdoc]_
+
+ Loadpin and IPE differ fairly dramatically, as Loadpin only covers the IPE's
+ kernel read operations, whereas IPE is capable of controlling execution
+ on top of kernel read. The trust model is also different; Loadpin roots its
+ trust in the initial super-block, whereas trust in IPE is stemmed from kernel
+ itself (via ``SYSTEM_TRUSTED_KEYS``).
+
+-----------
+
+.. [#digest_cache_lsm] https://lore.kernel.org/lkml/20240415142436.2545003-1-roberto.sassu@huaweicloud.com/
+
+.. [#devdoc] Please see :doc:`the design docs </security/ipe>` for more on
+ this topic.
+
+.. [#switch_root] https://man7.org/linux/man-pages/man8/switch_root.8.html
+
+.. [#dmveritydigests] These hash algorithms are based on values accepted by
+ the Linux crypto API; IPE does not impose any
+ restrictions on the digest algorithm itself;
+ thus, this list may be out of date.
+
+.. [#fsveritydigest] These hash algorithms are based on values accepted by the
+ kernel's fsverity support; IPE does not impose any
+ restrictions on the digest algorithm itself;
+ thus, this list may be out of date.
diff --git a/Documentation/admin-guide/LSM/landlock.rst b/Documentation/admin-guide/LSM/landlock.rst
new file mode 100644
index 000000000000..9e61607def08
--- /dev/null
+++ b/Documentation/admin-guide/LSM/landlock.rst
@@ -0,0 +1,158 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright © 2025 Microsoft Corporation
+
+================================
+Landlock: system-wide management
+================================
+
+:Author: Mickaël Salaün
+:Date: March 2025
+
+Landlock can leverage the audit framework to log events.
+
+User space documentation can be found here:
+Documentation/userspace-api/landlock.rst.
+
+Audit
+=====
+
+Denied access requests are logged by default for a sandboxed program if `audit`
+is enabled. This default behavior can be changed with the
+sys_landlock_restrict_self() flags (cf.
+Documentation/userspace-api/landlock.rst). Landlock logs can also be masked
+thanks to audit rules. Landlock can generate 2 audit record types.
+
+Record types
+------------
+
+AUDIT_LANDLOCK_ACCESS
+ This record type identifies a denied access request to a kernel resource.
+ The ``domain`` field indicates the ID of the domain that blocked the
+ request. The ``blockers`` field indicates the cause(s) of this denial
+ (separated by a comma), and the following fields identify the kernel object
+ (similar to SELinux). There may be more than one of this record type per
+ audit event.
+
+ Example with a file link request generating two records in the same event::
+
+ domain=195ba459b blockers=fs.refer path="/usr/bin" dev="vda2" ino=351
+ domain=195ba459b blockers=fs.make_reg,fs.refer path="/usr/local" dev="vda2" ino=365
+
+AUDIT_LANDLOCK_DOMAIN
+ This record type describes the status of a Landlock domain. The ``status``
+ field can be either ``allocated`` or ``deallocated``.
+
+ The ``allocated`` status is part of the same audit event and follows
+ the first logged ``AUDIT_LANDLOCK_ACCESS`` record of a domain. It identifies
+ Landlock domain information at the time of the sys_landlock_restrict_self()
+ call with the following fields:
+
+ - the ``domain`` ID
+ - the enforcement ``mode``
+ - the domain creator's ``pid``
+ - the domain creator's ``uid``
+ - the domain creator's executable path (``exe``)
+ - the domain creator's command line (``comm``)
+
+ Example::
+
+ domain=195ba459b status=allocated mode=enforcing pid=300 uid=0 exe="/root/sandboxer" comm="sandboxer"
+
+ The ``deallocated`` status is an event on its own and it identifies a
+ Landlock domain release. After such event, it is guarantee that the
+ related domain ID will never be reused during the lifetime of the system.
+ The ``domain`` field indicates the ID of the domain which is released, and
+ the ``denials`` field indicates the total number of denied access request,
+ which might not have been logged according to the audit rules and
+ sys_landlock_restrict_self()'s flags.
+
+ Example::
+
+ domain=195ba459b status=deallocated denials=3
+
+
+Event samples
+--------------
+
+Here are two examples of log events (see serial numbers).
+
+In this example a sandboxed program (``kill``) tries to send a signal to the
+init process, which is denied because of the signal scoping restriction
+(``LL_SCOPED=s``)::
+
+ $ LL_FS_RO=/ LL_FS_RW=/ LL_SCOPED=s LL_FORCE_LOG=1 ./sandboxer kill 1
+
+This command generates two events, each identified with a unique serial
+number following a timestamp (``msg=audit(1729738800.268:30)``). The first
+event (serial ``30``) contains 4 records. The first record
+(``type=LANDLOCK_ACCESS``) shows an access denied by the domain `1a6fdc66f`.
+The cause of this denial is signal scopping restriction
+(``blockers=scope.signal``). The process that would have receive this signal
+is the init process (``opid=1 ocomm="systemd"``).
+
+The second record (``type=LANDLOCK_DOMAIN``) describes (``status=allocated``)
+domain `1a6fdc66f`. This domain was created by process ``286`` executing the
+``/root/sandboxer`` program launched by the root user.
+
+The third record (``type=SYSCALL``) describes the syscall, its provided
+arguments, its result (``success=no exit=-1``), and the process that called it.
+
+The fourth record (``type=PROCTITLE``) shows the command's name as an
+hexadecimal value. This can be translated with ``python -c
+'print(bytes.fromhex("6B696C6C0031"))'``.
+
+Finally, the last record (``type=LANDLOCK_DOMAIN``) is also the only one from
+the second event (serial ``31``). It is not tied to a direct user space action
+but an asynchronous one to free resources tied to a Landlock domain
+(``status=deallocated``). This can be useful to know that the following logs
+will not concern the domain ``1a6fdc66f`` anymore. This record also summarize
+the number of requests this domain denied (``denials=1``), whether they were
+logged or not.
+
+.. code-block::
+
+ type=LANDLOCK_ACCESS msg=audit(1729738800.268:30): domain=1a6fdc66f blockers=scope.signal opid=1 ocomm="systemd"
+ type=LANDLOCK_DOMAIN msg=audit(1729738800.268:30): domain=1a6fdc66f status=allocated mode=enforcing pid=286 uid=0 exe="/root/sandboxer" comm="sandboxer"
+ type=SYSCALL msg=audit(1729738800.268:30): arch=c000003e syscall=62 success=no exit=-1 [..] ppid=272 pid=286 auid=0 uid=0 gid=0 [...] comm="kill" [...]
+ type=PROCTITLE msg=audit(1729738800.268:30): proctitle=6B696C6C0031
+ type=LANDLOCK_DOMAIN msg=audit(1729738800.324:31): domain=1a6fdc66f status=deallocated denials=1
+
+Here is another example showcasing filesystem access control::
+
+ $ LL_FS_RO=/ LL_FS_RW=/tmp LL_FORCE_LOG=1 ./sandboxer sh -c "echo > /etc/passwd"
+
+The related audit logs contains 8 records from 3 different events (serials 33,
+34 and 35) created by the same domain `1a6fdc679`::
+
+ type=LANDLOCK_ACCESS msg=audit(1729738800.221:33): domain=1a6fdc679 blockers=fs.write_file path="/dev/tty" dev="devtmpfs" ino=9
+ type=LANDLOCK_DOMAIN msg=audit(1729738800.221:33): domain=1a6fdc679 status=allocated mode=enforcing pid=289 uid=0 exe="/root/sandboxer" comm="sandboxer"
+ type=SYSCALL msg=audit(1729738800.221:33): arch=c000003e syscall=257 success=no exit=-13 [...] ppid=272 pid=289 auid=0 uid=0 gid=0 [...] comm="sh" [...]
+ type=PROCTITLE msg=audit(1729738800.221:33): proctitle=7368002D63006563686F203E202F6574632F706173737764
+ type=LANDLOCK_ACCESS msg=audit(1729738800.221:34): domain=1a6fdc679 blockers=fs.write_file path="/etc/passwd" dev="vda2" ino=143821
+ type=SYSCALL msg=audit(1729738800.221:34): arch=c000003e syscall=257 success=no exit=-13 [...] ppid=272 pid=289 auid=0 uid=0 gid=0 [...] comm="sh" [...]
+ type=PROCTITLE msg=audit(1729738800.221:34): proctitle=7368002D63006563686F203E202F6574632F706173737764
+ type=LANDLOCK_DOMAIN msg=audit(1729738800.261:35): domain=1a6fdc679 status=deallocated denials=2
+
+
+Event filtering
+---------------
+
+If you get spammed with audit logs related to Landlock, this is either an
+attack attempt or a bug in the security policy. We can put in place some
+filters to limit noise with two complementary ways:
+
+- with sys_landlock_restrict_self()'s flags if we can fix the sandboxed
+ programs,
+- or with audit rules (see :manpage:`auditctl(8)`).
+
+Additional documentation
+========================
+
+* `Linux Audit Documentation`_
+* Documentation/userspace-api/landlock.rst
+* Documentation/security/landlock.rst
+* https://landlock.io
+
+.. Links
+.. _Linux Audit Documentation:
+ https://github.com/linux-audit/audit-documentation/wiki
diff --git a/Documentation/admin-guide/LSM/tomoyo.rst b/Documentation/admin-guide/LSM/tomoyo.rst
index 4bc9c2b4da6f..bdb2c2e2a1b2 100644
--- a/Documentation/admin-guide/LSM/tomoyo.rst
+++ b/Documentation/admin-guide/LSM/tomoyo.rst
@@ -9,8 +9,8 @@ TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
LiveCD-based tutorials are available at
-http://tomoyo.sourceforge.jp/1.8/ubuntu12.04-live.html
-http://tomoyo.sourceforge.jp/1.8/centos6-live.html
+https://tomoyo.sourceforge.net/1.8/ubuntu12.04-live.html
+https://tomoyo.sourceforge.net/1.8/centos6-live.html
Though these tutorials use non-LSM version of TOMOYO, they are useful for you
to know what TOMOYO is.
@@ -21,45 +21,32 @@ How to enable TOMOYO?
Build the kernel with ``CONFIG_SECURITY_TOMOYO=y`` and pass ``security=tomoyo`` on
kernel's command line.
-Please see http://tomoyo.osdn.jp/2.5/ for details.
+Please see https://tomoyo.sourceforge.net/2.6/ for details.
Where is documentation?
=======================
User <-> Kernel interface documentation is available at
-https://tomoyo.osdn.jp/2.5/policy-specification/index.html .
+https://tomoyo.sourceforge.net/2.6/policy-specification/index.html .
Materials we prepared for seminars and symposiums are available at
-https://osdn.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
+https://sourceforge.net/projects/tomoyo/files/docs/ .
Below lists are chosen from three aspects.
What is TOMOYO?
TOMOYO Linux Overview
- https://osdn.jp/projects/tomoyo/docs/lca2009-takeda.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/lca2009-takeda.pdf
TOMOYO Linux: pragmatic and manageable security for Linux
- https://osdn.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/freedomhectaipei-tomoyo.pdf
TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
- https://osdn.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/PacSec2007-en-no-demo.pdf
What can TOMOYO do?
Deep inside TOMOYO Linux
- https://osdn.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/lca2009-kumaneko.pdf
The role of "pathname based access control" in security.
- https://osdn.jp/projects/tomoyo/docs/lfj2008-bof.pdf
+ https://sourceforge.net/projects/tomoyo/files/docs/lfj2008-bof.pdf
History of TOMOYO?
Realities of Mainlining
- https://osdn.jp/projects/tomoyo/docs/lfj2008.pdf
-
-What is future plan?
-====================
-
-We believe that inode based security and name based security are complementary
-and both should be used together. But unfortunately, so far, we cannot enable
-multiple LSM modules at the same time. We feel sorry that you have to give up
-SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
-
-We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
-version of TOMOYO, available at http://tomoyo.osdn.jp/1.8/ .
-LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
-to port non-LSM version's functionalities to LSM versions.
+ https://sourceforge.net/projects/tomoyo/files/docs/lfj2008.pdf
diff --git a/Documentation/admin-guide/RAS/address-translation.rst b/Documentation/admin-guide/RAS/address-translation.rst
new file mode 100644
index 000000000000..f0ca17b43cd3
--- /dev/null
+++ b/Documentation/admin-guide/RAS/address-translation.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Address translation
+===================
+
+x86 AMD
+-------
+
+Zen-based AMD systems include a Data Fabric that manages the layout of
+physical memory. Devices attached to the Fabric, like memory controllers,
+I/O, etc., may not have a complete view of the system physical memory map.
+These devices may provide a "normalized", i.e. device physical, address
+when reporting memory errors. Normalized addresses must be translated to
+a system physical address for the kernel to action on the memory.
+
+AMD Address Translation Library (CONFIG_AMD_ATL) provides translation for
+this case.
+
+Glossary of acronyms used in address translation for Zen-based systems
+
+* CCM = Cache Coherent Moderator
+* COD = Cluster-on-Die
+* COH_ST = Coherent Station
+* DF = Data Fabric
diff --git a/Documentation/admin-guide/RAS/error-decoding.rst b/Documentation/admin-guide/RAS/error-decoding.rst
new file mode 100644
index 000000000000..26a72f3fe5de
--- /dev/null
+++ b/Documentation/admin-guide/RAS/error-decoding.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Error decoding
+==============
+
+x86
+---
+
+Error decoding on AMD systems should be done using the rasdaemon tool:
+https://github.com/mchehab/rasdaemon/
+
+While the daemon is running, it would automatically log and decode
+errors. If not, one can still decode such errors by supplying the
+hardware information from the error::
+
+ $ rasdaemon -p --status <STATUS> --ipid <IPID> --smca
+
+Also, the user can pass particular family and model to decode the error
+string::
+
+ $ rasdaemon -p --status <STATUS> --ipid <IPID> --smca --family <CPU Family> --model <CPU Model> --bank <BANK_NUM>
diff --git a/Documentation/admin-guide/RAS/index.rst b/Documentation/admin-guide/RAS/index.rst
new file mode 100644
index 000000000000..f4087040a7c0
--- /dev/null
+++ b/Documentation/admin-guide/RAS/index.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. toctree::
+ :maxdepth: 2
+
+ main
+ error-decoding
+ address-translation
diff --git a/Documentation/admin-guide/RAS/main.rst b/Documentation/admin-guide/RAS/main.rst
new file mode 100644
index 000000000000..5a45db32c49b
--- /dev/null
+++ b/Documentation/admin-guide/RAS/main.rst
@@ -0,0 +1,1087 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==================================================
+Reliability, Availability and Serviceability (RAS)
+==================================================
+
+This documents different aspects of the RAS functionality present in the
+kernel.
+
+RAS concepts
+************
+
+Reliability, Availability and Serviceability (RAS) is a concept used on
+servers meant to measure their robustness.
+
+Reliability
+ is the probability that a system will produce correct outputs.
+
+ * Generally measured as Mean Time Between Failures (MTBF)
+ * Enhanced by features that help to avoid, detect and repair hardware faults
+
+Availability
+ is the probability that a system is operational at a given time
+
+ * Generally measured as a percentage of downtime per a period of time
+ * Often uses mechanisms to detect and correct hardware faults in
+ runtime;
+
+Serviceability (or maintainability)
+ is the simplicity and speed with which a system can be repaired or
+ maintained
+
+ * Generally measured on Mean Time Between Repair (MTBR)
+
+Improving RAS
+-------------
+
+In order to reduce systems downtime, a system should be capable of detecting
+hardware errors, and, when possible correcting them in runtime. It should
+also provide mechanisms to detect hardware degradation, in order to warn
+the system administrator to take the action of replacing a component before
+it causes data loss or system downtime.
+
+Among the monitoring measures, the most usual ones include:
+
+* CPU – detect errors at instruction execution and at L1/L2/L3 caches;
+* Memory – add error correction logic (ECC) to detect and correct errors;
+* I/O – add CRC checksums for transferred data;
+* Storage – RAID, journal file systems, checksums,
+ Self-Monitoring, Analysis and Reporting Technology (SMART).
+
+By monitoring the number of occurrences of error detections, it is possible
+to identify if the probability of hardware errors is increasing, and, on such
+case, do a preventive maintenance to replace a degraded component while
+those errors are correctable.
+
+Types of errors
+---------------
+
+Most mechanisms used on modern systems use technologies like Hamming
+Codes that allow error correction when the number of errors on a bit packet
+is below a threshold. If the number of errors is above, those mechanisms
+can indicate with a high degree of confidence that an error happened, but
+they can't correct.
+
+Also, sometimes an error occur on a component that it is not used. For
+example, a part of the memory that it is not currently allocated.
+
+That defines some categories of errors:
+
+* **Correctable Error (CE)** - the error detection mechanism detected and
+ corrected the error. Such errors are usually not fatal, although some
+ Kernel mechanisms allow the system administrator to consider them as fatal.
+
+* **Uncorrected Error (UE)** - the amount of errors happened above the error
+ correction threshold, and the system was unable to auto-correct.
+
+* **Fatal Error** - when an UE error happens on a critical component of the
+ system (for example, a piece of the Kernel got corrupted by an UE), the
+ only reliable way to avoid data corruption is to hang or reboot the machine.
+
+* **Non-fatal Error** - when an UE error happens on an unused component,
+ like a CPU in power down state or an unused memory bank, the system may
+ still run, eventually replacing the affected hardware by a hot spare,
+ if available.
+
+ Also, when an error happens on a userspace process, it is also possible to
+ kill such process and let userspace restart it.
+
+The mechanism for handling non-fatal errors is usually complex and may
+require the help of some userspace application, in order to apply the
+policy desired by the system administrator.
+
+Identifying a bad hardware component
+------------------------------------
+
+Just detecting a hardware flaw is usually not enough, as the system needs
+to pinpoint to the minimal replaceable unit (MRU) that should be exchanged
+to make the hardware reliable again.
+
+So, it requires not only error logging facilities, but also mechanisms that
+will translate the error message to the silkscreen or component label for
+the MRU.
+
+Typically, it is very complex for memory, as modern CPUs interlace memory
+from different memory modules, in order to provide a better performance. The
+DMI BIOS usually have a list of memory module labels, with can be obtained
+using the ``dmidecode`` tool. For example, on a desktop machine, it shows::
+
+ Memory Device
+ Total Width: 64 bits
+ Data Width: 64 bits
+ Size: 16384 MB
+ Form Factor: SODIMM
+ Set: None
+ Locator: ChannelA-DIMM0
+ Bank Locator: BANK 0
+ Type: DDR4
+ Type Detail: Synchronous
+ Speed: 2133 MHz
+ Rank: 2
+ Configured Clock Speed: 2133 MHz
+
+On the above example, a DDR4 SO-DIMM memory module is located at the
+system's memory labeled as "BANK 0", as given by the *bank locator* field.
+Please notice that, on such system, the *total width* is equal to the
+*data width*. It means that such memory module doesn't have error
+detection/correction mechanisms.
+
+Unfortunately, not all systems use the same field to specify the memory
+bank. On this example, from an older server, ``dmidecode`` shows::
+
+ Memory Device
+ Array Handle: 0x1000
+ Error Information Handle: Not Provided
+ Total Width: 72 bits
+ Data Width: 64 bits
+ Size: 8192 MB
+ Form Factor: DIMM
+ Set: 1
+ Locator: DIMM_A1
+ Bank Locator: Not Specified
+ Type: DDR3
+ Type Detail: Synchronous Registered (Buffered)
+ Speed: 1600 MHz
+ Rank: 2
+ Configured Clock Speed: 1600 MHz
+
+There, the DDR3 RDIMM memory module is located at the system's memory labeled
+as "DIMM_A1", as given by the *locator* field. Please notice that this
+memory module has 64 bits of *data width* and 72 bits of *total width*. So,
+it has 8 extra bits to be used by error detection and correction mechanisms.
+Such kind of memory is called Error-correcting code memory (ECC memory).
+
+To make things even worse, it is not uncommon that systems with different
+labels on their system's board to use exactly the same BIOS, meaning that
+the labels provided by the BIOS won't match the real ones.
+
+ECC memory
+----------
+
+As mentioned in the previous section, ECC memory has extra bits to be
+used for error correction. In the above example, a memory module has
+64 bits of *data width*, and 72 bits of *total width*. The extra 8
+bits which are used for the error detection and correction mechanisms
+are referred to as the *syndrome*\ [#f1]_\ [#f2]_.
+
+So, when the cpu requests the memory controller to write a word with
+*data width*, the memory controller calculates the *syndrome* in real time,
+using Hamming code, or some other error correction code, like SECDED+,
+producing a code with *total width* size. Such code is then written
+on the memory modules.
+
+At read, the *total width* bits code is converted back, using the same
+ECC code used on write, producing a word with *data width* and a *syndrome*.
+The word with *data width* is sent to the CPU, even when errors happen.
+
+The memory controller also looks at the *syndrome* in order to check if
+there was an error, and if the ECC code was able to fix such error.
+If the error was corrected, a Corrected Error (CE) happened. If not, an
+Uncorrected Error (UE) happened.
+
+The information about the CE/UE errors is stored on some special registers
+at the memory controller and can be accessed by reading such registers,
+either by BIOS, by some special CPUs or by Linux EDAC driver. On x86 64
+bit CPUs, such errors can also be retrieved via the Machine Check
+Architecture (MCA)\ [#f3]_.
+
+.. [#f1] Please notice that several memory controllers allow operation on a
+ mode called "Lock-Step", where it groups two memory modules together,
+ doing 128-bit reads/writes. That gives 16 bits for error correction, with
+ significantly improves the error correction mechanism, at the expense
+ that, when an error happens, there's no way to know what memory module is
+ to blame. So, it has to blame both memory modules.
+
+.. [#f2] Some memory controllers also allow using memory in mirror mode.
+ On such mode, the same data is written to two memory modules. At read,
+ the system checks both memory modules, in order to check if both provide
+ identical data. On such configuration, when an error happens, there's no
+ way to know what memory module is to blame. So, it has to blame both
+ memory modules (or 4 memory modules, if the system is also on Lock-step
+ mode).
+
+.. [#f3] For more details about the Machine Check Architecture (MCA),
+ please read Documentation/arch/x86/x86_64/machinecheck.rst at the Kernel tree.
+
+EDAC - Error Detection And Correction
+*************************************
+
+.. note::
+
+ "bluesmoke" was the name for this device driver subsystem when it
+ was "out-of-tree" and maintained at http://bluesmoke.sourceforge.net.
+ That site is mostly archaic now and can be used only for historical
+ purposes.
+
+ When the subsystem was pushed upstream for the first time, on
+ Kernel 2.6.16, it was renamed to ``EDAC``.
+
+Purpose
+-------
+
+The ``edac`` kernel module's goal is to detect and report hardware errors
+that occur within the computer system running under linux.
+
+Memory
+------
+
+Memory Correctable Errors (CE) and Uncorrectable Errors (UE) are the
+primary errors being harvested. These types of errors are harvested by
+the ``edac_mc`` device.
+
+Detecting CE events, then harvesting those events and reporting them,
+**can** but must not necessarily be a predictor of future UE events. With
+CE events only, the system can and will continue to operate as no data
+has been damaged yet.
+
+However, preventive maintenance and proactive part replacement of memory
+modules exhibiting CEs can reduce the likelihood of the dreaded UE events
+and system panics.
+
+Other hardware elements
+-----------------------
+
+A new feature for EDAC, the ``edac_device`` class of device, was added in
+the 2.6.23 version of the kernel.
+
+This new device type allows for non-memory type of ECC hardware detectors
+to have their states harvested and presented to userspace via the sysfs
+interface.
+
+Some architectures have ECC detectors for L1, L2 and L3 caches,
+along with DMA engines, fabric switches, main data path switches,
+interconnections, and various other hardware data paths. If the hardware
+reports it, then an edac_device device probably can be constructed to
+harvest and present that to userspace.
+
+
+PCI bus scanning
+----------------
+
+In addition, PCI devices are scanned for PCI Bus Parity and SERR Errors
+in order to determine if errors are occurring during data transfers.
+
+The presence of PCI Parity errors must be examined with a grain of salt.
+There are several add-in adapters that do **not** follow the PCI specification
+with regards to Parity generation and reporting. The specification says
+the vendor should tie the parity status bits to 0 if they do not intend
+to generate parity. Some vendors do not do this, and thus the parity bit
+can "float" giving false positives.
+
+There is a PCI device attribute located in sysfs that is checked by
+the EDAC PCI scanning code. If that attribute is set, PCI parity/error
+scanning is skipped for that device. The attribute is::
+
+ broken_parity_status
+
+and is located in ``/sys/devices/pci<XXX>/0000:XX:YY.Z`` directories for
+PCI devices.
+
+
+Versioning
+----------
+
+EDAC is composed of a "core" module (``edac_core.ko``) and several Memory
+Controller (MC) driver modules. On a given system, the CORE is loaded
+and one MC driver will be loaded. Both the CORE and the MC driver (or
+``edac_device`` driver) have individual versions that reflect current
+release level of their respective modules.
+
+Thus, to "report" on what version a system is running, one must report
+both the CORE's and the MC driver's versions.
+
+
+Loading
+-------
+
+If ``edac`` was statically linked with the kernel then no loading
+is necessary. If ``edac`` was built as modules then simply modprobe
+the ``edac`` pieces that you need. You should be able to modprobe
+hardware-specific modules and have the dependencies load the necessary
+core modules.
+
+Example::
+
+ $ modprobe amd76x_edac
+
+loads both the ``amd76x_edac.ko`` memory controller module and the
+``edac_mc.ko`` core module.
+
+
+Sysfs interface
+---------------
+
+EDAC presents a ``sysfs`` interface for control and reporting purposes. It
+lives in the /sys/devices/system/edac directory.
+
+Within this directory there currently reside 2 components:
+
+ ======= ==============================
+ mc memory controller(s) system
+ pci PCI control and status system
+ ======= ==============================
+
+
+
+Memory Controller (mc) Model
+----------------------------
+
+Each ``mc`` device controls a set of memory modules [#f4]_. These modules
+are laid out in a Chip-Select Row (``csrowX``) and Channel table (``chX``).
+There can be multiple csrows and multiple channels.
+
+.. [#f4] Nowadays, the term DIMM (Dual In-line Memory Module) is widely
+ used to refer to a memory module, although there are other memory
+ packaging alternatives, like SO-DIMM, SIMM, etc. The UEFI
+ specification (Version 2.7) defines a memory module in the Common
+ Platform Error Record (CPER) section to be an SMBIOS Memory Device
+ (Type 17). Along this document, and inside the EDAC subsystem, the term
+ "dimm" is used for all memory modules, even when they use a
+ different kind of packaging.
+
+Memory controllers allow for several csrows, with 8 csrows being a
+typical value. Yet, the actual number of csrows depends on the layout of
+a given motherboard, memory controller and memory module characteristics.
+
+Dual channels allow for dual data length (e. g. 128 bits, on 64 bit systems)
+data transfers to/from the CPU from/to memory. Some newer chipsets allow
+for more than 2 channels, like Fully Buffered DIMMs (FB-DIMMs) memory
+controllers. The following example will assume 2 channels:
+
+ +------------+-----------------------+
+ | CS Rows | Channels |
+ +------------+-----------+-----------+
+ | | ``ch0`` | ``ch1`` |
+ +============+===========+===========+
+ | |**DIMM_A0**|**DIMM_B0**|
+ +------------+-----------+-----------+
+ | ``csrow0`` | rank0 | rank0 |
+ +------------+-----------+-----------+
+ | ``csrow1`` | rank1 | rank1 |
+ +------------+-----------+-----------+
+ | |**DIMM_A1**|**DIMM_B1**|
+ +------------+-----------+-----------+
+ | ``csrow2`` | rank0 | rank0 |
+ +------------+-----------+-----------+
+ | ``csrow3`` | rank1 | rank1 |
+ +------------+-----------+-----------+
+
+In the above example, there are 4 physical slots on the motherboard
+for memory DIMMs:
+
+ +---------+---------+
+ | DIMM_A0 | DIMM_B0 |
+ +---------+---------+
+ | DIMM_A1 | DIMM_B1 |
+ +---------+---------+
+
+Labels for these slots are usually silk-screened on the motherboard.
+Slots labeled ``A`` are channel 0 in this example. Slots labeled ``B`` are
+channel 1. Notice that there are two csrows possible on a physical DIMM.
+These csrows are allocated their csrow assignment based on the slot into
+which the memory DIMM is placed. Thus, when 1 DIMM is placed in each
+Channel, the csrows cross both DIMMs.
+
+Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
+In the example above 2 dual ranked DIMMs are similarly placed. Thus,
+both csrow0 and csrow1 are populated. On the other hand, when 2 single
+ranked DIMMs are placed in slots DIMM_A0 and DIMM_B0, then they will
+have just one csrow (csrow0) and csrow1 will be empty. The pattern
+repeats itself for csrow2 and csrow3. Also note that some memory
+controllers don't have any logic to identify the memory module, see
+``rankX`` directories below.
+
+The representation of the above is reflected in the directory
+tree in EDAC's sysfs interface. Starting in directory
+``/sys/devices/system/edac/mc``, each memory controller will be
+represented by its own ``mcX`` directory, where ``X`` is the
+index of the MC::
+
+ ..../edac/mc/
+ |
+ |->mc0
+ |->mc1
+ |->mc2
+ ....
+
+Within each of the ``mcX`` directory are several EDAC control and
+attribute files.
+
+``mcX`` directories
+-------------------
+
+In ``mcX`` directories are EDAC control and attribute files for
+this ``X`` instance of the memory controllers.
+
+For a description of the sysfs API, please see:
+
+ Documentation/ABI/testing/sysfs-devices-edac
+
+
+``dimmX`` or ``rankX`` directories
+----------------------------------
+
+The recommended way to use the EDAC subsystem is to look at the information
+provided by the ``dimmX`` or ``rankX`` directories [#f5]_.
+
+A typical EDAC system has the following structure under
+``/sys/devices/system/edac/``\ [#f6]_::
+
+ /sys/devices/system/edac/
+ ├── mc
+ │   ├── mc0
+ │   │   ├── ce_count
+ │   │   ├── ce_noinfo_count
+ │   │   ├── dimm0
+ │   │   │   ├── dimm_ce_count
+ │   │   │   ├── dimm_dev_type
+ │   │   │   ├── dimm_edac_mode
+ │   │   │   ├── dimm_label
+ │   │   │   ├── dimm_location
+ │   │   │   ├── dimm_mem_type
+ │   │   │   ├── dimm_ue_count
+ │   │   │   ├── size
+ │   │   │   └── uevent
+ │   │   ├── max_location
+ │   │   ├── mc_name
+ │   │   ├── reset_counters
+ │   │   ├── seconds_since_reset
+ │   │   ├── size_mb
+ │   │   ├── ue_count
+ │   │   ├── ue_noinfo_count
+ │   │   └── uevent
+ │   ├── mc1
+ │   │   ├── ce_count
+ │   │   ├── ce_noinfo_count
+ │   │   ├── dimm0
+ │   │   │   ├── dimm_ce_count
+ │   │   │   ├── dimm_dev_type
+ │   │   │   ├── dimm_edac_mode
+ │   │   │   ├── dimm_label
+ │   │   │   ├── dimm_location
+ │   │   │   ├── dimm_mem_type
+ │   │   │   ├── dimm_ue_count
+ │   │   │   ├── size
+ │   │   │   └── uevent
+ │   │   ├── max_location
+ │   │   ├── mc_name
+ │   │   ├── reset_counters
+ │   │   ├── seconds_since_reset
+ │   │   ├── size_mb
+ │   │   ├── ue_count
+ │   │   ├── ue_noinfo_count
+ │   │   └── uevent
+ │   └── uevent
+ └── uevent
+
+In the ``dimmX`` directories are EDAC control and attribute files for
+this ``X`` memory module:
+
+- ``size`` - Total memory managed by this csrow attribute file
+
+ This attribute file displays, in count of megabytes, the memory
+ that this csrow contains.
+
+- ``dimm_ue_count`` - Uncorrectable Errors count attribute file
+
+ This attribute file displays the total count of uncorrectable
+ errors that have occurred on this DIMM. If panic_on_ue is set
+ this counter will not have a chance to increment, since EDAC
+ will panic the system.
+
+- ``dimm_ce_count`` - Correctable Errors count attribute file
+
+ This attribute file displays the total count of correctable
+ errors that have occurred on this DIMM. This count is very
+ important to examine. CEs provide early indications that a
+ DIMM is beginning to fail. This count field should be
+ monitored for non-zero values and report such information
+ to the system administrator.
+
+- ``dimm_dev_type`` - Device type attribute file
+
+ This attribute file will display what type of DRAM device is
+ being utilized on this DIMM.
+ Examples:
+
+ - x1
+ - x2
+ - x4
+ - x8
+
+- ``dimm_edac_mode`` - EDAC Mode of operation attribute file
+
+ This attribute file will display what type of Error detection
+ and correction is being utilized.
+
+- ``dimm_label`` - memory module label control file
+
+ This control file allows this DIMM to have a label assigned
+ to it. With this label in the module, when errors occur
+ the output can provide the DIMM label in the system log.
+ This becomes vital for panic events to isolate the
+ cause of the UE event.
+
+ DIMM Labels must be assigned after booting, with information
+ that correctly identifies the physical slot with its
+ silk screen label. This information is currently very
+ motherboard specific and determination of this information
+ must occur in userland at this time.
+
+- ``dimm_location`` - location of the memory module
+
+ The location can have up to 3 levels, and describe how the
+ memory controller identifies the location of a memory module.
+ Depending on the type of memory and memory controller, it
+ can be:
+
+ - *csrow* and *channel* - used when the memory controller
+ doesn't identify a single DIMM - e. g. in ``rankX`` dir;
+ - *branch*, *channel*, *slot* - typically used on FB-DIMM memory
+ controllers;
+ - *channel*, *slot* - used on Nehalem and newer Intel drivers.
+
+- ``dimm_mem_type`` - Memory Type attribute file
+
+ This attribute file will display what type of memory is currently
+ on this csrow. Normally, either buffered or unbuffered memory.
+ Examples:
+
+ - Registered-DDR
+ - Unbuffered-DDR
+
+.. [#f5] On some systems, the memory controller doesn't have any logic
+ to identify the memory module. On such systems, the directory is called ``rankX``.
+ On modern Intel memory controllers, the memory controller identifies the
+ memory modules directly. On such systems, the directory is called ``dimmX``.
+
+.. [#f6] There are also some ``power`` directories and ``subsystem``
+ symlinks inside the sysfs mapping that are automatically created by
+ the sysfs subsystem. Currently, they serve no purpose.
+
+
+System Logging
+--------------
+
+If logging for UEs and CEs is enabled, then system logs will contain
+information indicating that errors have been detected::
+
+ EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0, channel 1 "DIMM_B1": amd76x_edac
+ EDAC MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0, channel 1 "DIMM_B1": amd76x_edac
+
+
+The structure of the message is:
+
+ +---------------------------------------+-------------+
+ | Content | Example |
+ +=======================================+=============+
+ | The memory controller | MC0 |
+ +---------------------------------------+-------------+
+ | Error type | CE |
+ +---------------------------------------+-------------+
+ | Memory page | 0x283 |
+ +---------------------------------------+-------------+
+ | Offset in the page | 0xce0 |
+ +---------------------------------------+-------------+
+ | The byte granularity | grain 8 |
+ | or resolution of the error | |
+ +---------------------------------------+-------------+
+ | The error syndrome | 0xb741 |
+ +---------------------------------------+-------------+
+ | Memory row | row 0 |
+ +---------------------------------------+-------------+
+ | Memory channel | channel 1 |
+ +---------------------------------------+-------------+
+ | DIMM label, if set prior | DIMM B1 |
+ +---------------------------------------+-------------+
+ | And then an optional, driver-specific | |
+ | message that may have additional | |
+ | information. | |
+ +---------------------------------------+-------------+
+
+Both UEs and CEs with no info will lack all but memory controller, error
+type, a notice of "no info" and then an optional, driver-specific error
+message.
+
+
+PCI Bus Parity Detection
+------------------------
+
+On Header Type 00 devices, the primary status is looked at for any
+parity error regardless of whether parity is enabled on the device or
+not. (The spec indicates parity is generated in some cases). On Header
+Type 01 bridges, the secondary status register is also looked at to see
+if parity occurred on the bus on the other side of the bridge.
+
+
+Sysfs configuration
+-------------------
+
+Under ``/sys/devices/system/edac/pci`` are control and attribute files as
+follows:
+
+
+- ``check_pci_parity`` - Enable/Disable PCI Parity checking control file
+
+ This control file enables or disables the PCI Bus Parity scanning
+ operation. Writing a 1 to this file enables the scanning. Writing
+ a 0 to this file disables the scanning.
+
+ Enable::
+
+ echo "1" >/sys/devices/system/edac/pci/check_pci_parity
+
+ Disable::
+
+ echo "0" >/sys/devices/system/edac/pci/check_pci_parity
+
+
+- ``pci_parity_count`` - Parity Count
+
+ This attribute file will display the number of parity errors that
+ have been detected.
+
+
+Module parameters
+-----------------
+
+- ``edac_mc_panic_on_ue`` - Panic on UE control file
+
+ An uncorrectable error will cause a machine panic. This is usually
+ desirable. It is a bad idea to continue when an uncorrectable error
+ occurs - it is indeterminate what was uncorrected and the operating
+ system context might be so mangled that continuing will lead to further
+ corruption. If the kernel has MCE configured, then EDAC will never
+ notice the UE.
+
+ LOAD TIME::
+
+ module/kernel parameter: edac_mc_panic_on_ue=[0|1]
+
+ RUN TIME::
+
+ echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
+
+
+- ``edac_mc_log_ue`` - Log UE control file
+
+
+ Generate kernel messages describing uncorrectable errors. These errors
+ are reported through the system message log system. UE statistics
+ will be accumulated even when UE logging is disabled.
+
+ LOAD TIME::
+
+ module/kernel parameter: edac_mc_log_ue=[0|1]
+
+ RUN TIME::
+
+ echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
+
+
+- ``edac_mc_log_ce`` - Log CE control file
+
+
+ Generate kernel messages describing correctable errors. These
+ errors are reported through the system message log system.
+ CE statistics will be accumulated even when CE logging is disabled.
+
+ LOAD TIME::
+
+ module/kernel parameter: edac_mc_log_ce=[0|1]
+
+ RUN TIME::
+
+ echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
+
+
+- ``edac_mc_poll_msec`` - Polling period control file
+
+
+ The time period, in milliseconds, for polling for error information.
+ Too small a value wastes resources. Too large a value might delay
+ necessary handling of errors and might loose valuable information for
+ locating the error. 1000 milliseconds (once each second) is the current
+ default. Systems which require all the bandwidth they can get, may
+ increase this.
+
+ LOAD TIME::
+
+ module/kernel parameter: edac_mc_poll_msec=[0|1]
+
+ RUN TIME::
+
+ echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
+
+
+- ``panic_on_pci_parity`` - Panic on PCI PARITY Error
+
+
+ This control file enables or disables panicking when a parity
+ error has been detected.
+
+
+ module/kernel parameter::
+
+ edac_panic_on_pci_pe=[0|1]
+
+ Enable::
+
+ echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
+
+ Disable::
+
+ echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
+
+
+
+EDAC device type
+----------------
+
+In the header file, edac_pci.h, there is a series of edac_device structures
+and APIs for the EDAC_DEVICE.
+
+User space access to an edac_device is through the sysfs interface.
+
+At the location ``/sys/devices/system/edac`` (sysfs) new edac_device devices
+will appear.
+
+There is a three level tree beneath the above ``edac`` directory. For example,
+the ``test_device_edac`` device (found at the http://bluesmoke.sourceforget.net
+website) installs itself as::
+
+ /sys/devices/system/edac/test-instance
+
+in this directory are various controls, a symlink and one or more ``instance``
+directories.
+
+The standard default controls are:
+
+ ============== =======================================================
+ log_ce boolean to log CE events
+ log_ue boolean to log UE events
+ panic_on_ue boolean to ``panic`` the system if an UE is encountered
+ (default off, can be set true via startup script)
+ poll_msec time period between POLL cycles for events
+ ============== =======================================================
+
+The test_device_edac device adds at least one of its own custom control:
+
+ ============== ==================================================
+ test_bits which in the current test driver does nothing but
+ show how it is installed. A ported driver can
+ add one or more such controls and/or attributes
+ for specific uses.
+ One out-of-tree driver uses controls here to allow
+ for ERROR INJECTION operations to hardware
+ injection registers
+ ============== ==================================================
+
+The symlink points to the 'struct dev' that is registered for this edac_device.
+
+Instances
+---------
+
+One or more instance directories are present. For the ``test_device_edac``
+case:
+
+ +----------------+
+ | test-instance0 |
+ +----------------+
+
+
+In this directory there are two default counter attributes, which are totals of
+counter in deeper subdirectories.
+
+ ============== ====================================
+ ce_count total of CE events of subdirectories
+ ue_count total of UE events of subdirectories
+ ============== ====================================
+
+Blocks
+------
+
+At the lowest directory level is the ``block`` directory. There can be 0, 1
+or more blocks specified in each instance:
+
+ +-------------+
+ | test-block0 |
+ +-------------+
+
+In this directory the default attributes are:
+
+ ============== ================================================
+ ce_count which is counter of CE events for this ``block``
+ of hardware being monitored
+ ue_count which is counter of UE events for this ``block``
+ of hardware being monitored
+ ============== ================================================
+
+
+The ``test_device_edac`` device adds 4 attributes and 1 control:
+
+ ================== ====================================================
+ test-block-bits-0 for every POLL cycle this counter
+ is incremented
+ test-block-bits-1 every 10 cycles, this counter is bumped once,
+ and test-block-bits-0 is set to 0
+ test-block-bits-2 every 100 cycles, this counter is bumped once,
+ and test-block-bits-1 is set to 0
+ test-block-bits-3 every 1000 cycles, this counter is bumped once,
+ and test-block-bits-2 is set to 0
+ ================== ====================================================
+
+
+ ================== ====================================================
+ reset-counters writing ANY thing to this control will
+ reset all the above counters.
+ ================== ====================================================
+
+
+Use of the ``test_device_edac`` driver should enable any others to create their own
+unique drivers for their hardware systems.
+
+The ``test_device_edac`` sample driver is located at the
+http://bluesmoke.sourceforge.net project site for EDAC.
+
+
+Usage of EDAC APIs on Nehalem and newer Intel CPUs
+--------------------------------------------------
+
+On older Intel architectures, the memory controller was part of the North
+Bridge chipset. Nehalem, Sandy Bridge, Ivy Bridge, Haswell, Sky Lake and
+newer Intel architectures integrated an enhanced version of the memory
+controller (MC) inside the CPUs.
+
+This chapter will cover the differences of the enhanced memory controllers
+found on newer Intel CPUs, such as ``i7core_edac``, ``sb_edac`` and
+``sbx_edac`` drivers.
+
+.. note::
+
+ The Xeon E7 processor families use a separate chip for the memory
+ controller, called Intel Scalable Memory Buffer. This section doesn't
+ apply for such families.
+
+1) There is one Memory Controller per Quick Patch Interconnect
+ (QPI). At the driver, the term "socket" means one QPI. This is
+ associated with a physical CPU socket.
+
+ Each MC have 3 physical read channels, 3 physical write channels and
+ 3 logic channels. The driver currently sees it as just 3 channels.
+ Each channel can have up to 3 DIMMs.
+
+ The minimum known unity is DIMMs. There are no information about csrows.
+ As EDAC API maps the minimum unity is csrows, the driver sequentially
+ maps channel/DIMM into different csrows.
+
+ For example, supposing the following layout::
+
+ Ch0 phy rd0, wr0 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ dimm 1 1024 Mb offset: 4, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ Ch1 phy rd1, wr1 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ Ch2 phy rd3, wr3 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+
+ The driver will map it as::
+
+ csrow0: channel 0, dimm0
+ csrow1: channel 0, dimm1
+ csrow2: channel 1, dimm0
+ csrow3: channel 2, dimm0
+
+ exports one DIMM per csrow.
+
+ Each QPI is exported as a different memory controller.
+
+2) The MC has the ability to inject errors to test drivers. The drivers
+ implement this functionality via some error injection nodes:
+
+ For injecting a memory error, there are some sysfs nodes, under
+ ``/sys/devices/system/edac/mc/mc?/``:
+
+ - ``inject_addrmatch/*``:
+ Controls the error injection mask register. It is possible to specify
+ several characteristics of the address to match an error code::
+
+ dimm = the affected dimm. Numbers are relative to a channel;
+ rank = the memory rank;
+ channel = the channel that will generate an error;
+ bank = the affected bank;
+ page = the page address;
+ column (or col) = the address column.
+
+ each of the above values can be set to "any" to match any valid value.
+
+ At driver init, all values are set to any.
+
+ For example, to generate an error at rank 1 of dimm 2, for any channel,
+ any bank, any page, any column::
+
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
+ echo 1 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
+
+ To return to the default behaviour of matching any, you can do::
+
+ echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
+ echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
+
+ - ``inject_eccmask``:
+ specifies what bits will have troubles,
+
+ - ``inject_section``:
+ specifies what ECC cache section will get the error::
+
+ 3 for both
+ 2 for the highest
+ 1 for the lowest
+
+ - ``inject_type``:
+ specifies the type of error, being a combination of the following bits::
+
+ bit 0 - repeat
+ bit 1 - ecc
+ bit 2 - parity
+
+ - ``inject_enable``:
+ starts the error generation when something different than 0 is written.
+
+ All inject vars can be read. root permission is needed for write.
+
+ Datasheet states that the error will only be generated after a write on an
+ address that matches inject_addrmatch. It seems, however, that reading will
+ also produce an error.
+
+ For example, the following code will generate an error for any write access
+ at socket 0, on any DIMM/address on channel 2::
+
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/channel
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_type
+ echo 64 >/sys/devices/system/edac/mc/mc0/inject_eccmask
+ echo 3 >/sys/devices/system/edac/mc/mc0/inject_section
+ echo 1 >/sys/devices/system/edac/mc/mc0/inject_enable
+ dd if=/dev/mem of=/dev/null seek=16k bs=4k count=1 >& /dev/null
+
+ For socket 1, it is needed to replace "mc0" by "mc1" at the above
+ commands.
+
+ The generated error message will look like::
+
+ EDAC MC0: UE row 0, channel-a= 0 channel-b= 0 labels "-": NON_FATAL (addr = 0x0075b980, socket=0, Dimm=0, Channel=2, syndrome=0x00000040, count=1, Err=8c0000400001009f:4000080482 (read error: read ECC error))
+
+3) Corrected Error memory register counters
+
+ Those newer MCs have some registers to count memory errors. The driver
+ uses those registers to report Corrected Errors on devices with Registered
+ DIMMs.
+
+ However, those counters don't work with Unregistered DIMM. As the chipset
+ offers some counters that also work with UDIMMs (but with a worse level of
+ granularity than the default ones), the driver exposes those registers for
+ UDIMM memories.
+
+ They can be read by looking at the contents of ``all_channel_counts/``::
+
+ $ for i in /sys/devices/system/edac/mc/mc0/all_channel_counts/*; do echo $i; cat $i; done
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm0
+ 0
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm1
+ 0
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm2
+ 0
+
+ What happens here is that errors on different csrows, but at the same
+ dimm number will increment the same counter.
+ So, in this memory mapping::
+
+ csrow0: channel 0, dimm0
+ csrow1: channel 0, dimm1
+ csrow2: channel 1, dimm0
+ csrow3: channel 2, dimm0
+
+ The hardware will increment udimm0 for an error at the first dimm at either
+ csrow0, csrow2 or csrow3;
+
+ The hardware will increment udimm1 for an error at the second dimm at either
+ csrow0, csrow2 or csrow3;
+
+ The hardware will increment udimm2 for an error at the third dimm at either
+ csrow0, csrow2 or csrow3;
+
+4) Standard error counters
+
+ The standard error counters are generated when an mcelog error is received
+ by the driver. Since, with UDIMM, this is counted by software, it is
+ possible that some errors could be lost. With RDIMM's, they display the
+ contents of the registers
+
+Reference documents used on ``amd64_edac``
+------------------------------------------
+
+``amd64_edac`` module is based on the following documents
+(available from http://support.amd.com/en-us/search/tech-docs):
+
+1. :Title: BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
+ Opteron Processors
+ :AMD publication #: 26094
+ :Revision: 3.26
+ :Link: http://support.amd.com/TechDocs/26094.PDF
+
+2. :Title: BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh
+ Processors
+ :AMD publication #: 32559
+ :Revision: 3.00
+ :Issue Date: May 2006
+ :Link: http://support.amd.com/TechDocs/32559.pdf
+
+3. :Title: BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h
+ Processors
+ :AMD publication #: 31116
+ :Revision: 3.00
+ :Issue Date: September 07, 2007
+ :Link: http://support.amd.com/TechDocs/31116.pdf
+
+4. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h
+ Models 30h-3Fh Processors
+ :AMD publication #: 49125
+ :Revision: 3.06
+ :Issue Date: 2/12/2015 (latest release)
+ :Link: http://support.amd.com/TechDocs/49125_15h_Models_30h-3Fh_BKDG.pdf
+
+5. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h
+ Models 60h-6Fh Processors
+ :AMD publication #: 50742
+ :Revision: 3.01
+ :Issue Date: 7/23/2015 (latest release)
+ :Link: http://support.amd.com/TechDocs/50742_15h_Models_60h-6Fh_BKDG.pdf
+
+6. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 16h
+ Models 00h-0Fh Processors
+ :AMD publication #: 48751
+ :Revision: 3.03
+ :Issue Date: 2/23/2015 (latest release)
+ :Link: http://support.amd.com/TechDocs/48751_16h_bkdg.pdf
+
+Credits
+=======
+
+* Written by Doug Thompson <dougthompson@xmission.com>
+
+ - 7 Dec 2005
+ - 17 Jul 2007 Updated
+
+* |copy| Mauro Carvalho Chehab
+
+ - 05 Aug 2009 Nehalem interface
+ - 26 Oct 2016 Converted to ReST and cleanups at the Nehalem section
+
+* EDAC authors/maintainers:
+
+ - Doug Thompson, Dave Jiang, Dave Peterson et al,
+ - Mauro Carvalho Chehab
+ - Borislav Petkov
+ - original author: Thayne Harbaugh
diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index 9a969c0157f1..05301f03b717 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -165,7 +165,7 @@ Configuring the kernel
"make xconfig" Qt based configuration tool.
- "make gconfig" GTK+ based configuration tool.
+ "make gconfig" GTK based configuration tool.
"make oldconfig" Default all questions based on the contents of
your existing ./.config file and asking about
@@ -176,7 +176,7 @@ Configuring the kernel
values without prompting.
"make defconfig" Create a ./.config file by using the default
- symbol values from either arch/$ARCH/defconfig
+ symbol values from either arch/$ARCH/configs/defconfig
or arch/$ARCH/configs/${PLATFORM}_defconfig,
depending on the architecture.
@@ -259,12 +259,14 @@ Configuring the kernel
Compiling the kernel
--------------------
- - Make sure you have at least gcc 5.1 available.
+ - Make sure you have at least gcc 8.1 available.
For more information, refer to :ref:`Documentation/process/changes.rst <changes>`.
- - Do a ``make`` to create a compressed kernel image. It is also
- possible to do ``make install`` if you have lilo installed to suit the
- kernel makefiles, but you may want to check your particular lilo setup first.
+ - Do a ``make`` to create a compressed kernel image. It is also possible to do
+ ``make install`` if you have lilo installed or if your distribution has an
+ install script recognised by the kernel's installer. Most popular
+ distributions will have a recognized install script. You may want to
+ check your distribution's setup first.
To do the actual install, you have to be root, but none of the normal
build should require that. Don't take the name of root in vain.
@@ -301,32 +303,51 @@ Compiling the kernel
image (e.g. .../linux/arch/x86/boot/bzImage after compilation)
to the place where your regular bootable kernel is found.
- - Booting a kernel directly from a floppy without the assistance of a
- bootloader such as LILO, is no longer supported.
-
- If you boot Linux from the hard drive, chances are you use LILO, which
- uses the kernel image as specified in the file /etc/lilo.conf. The
- kernel image file is usually /vmlinuz, /boot/vmlinuz, /bzImage or
- /boot/bzImage. To use the new kernel, save a copy of the old image
- and copy the new image over the old one. Then, you MUST RERUN LILO
- to update the loading map! If you don't, you won't be able to boot
- the new kernel image.
-
- Reinstalling LILO is usually a matter of running /sbin/lilo.
- You may wish to edit /etc/lilo.conf to specify an entry for your
- old kernel image (say, /vmlinux.old) in case the new one does not
- work. See the LILO docs for more information.
-
- After reinstalling LILO, you should be all set. Shutdown the system,
+ - Booting a kernel directly from a storage device without the assistance
+ of a bootloader such as LILO or GRUB, is no longer supported in BIOS
+ (non-EFI systems). On UEFI/EFI systems, however, you can use EFISTUB
+ which allows the motherboard to boot directly to the kernel.
+ On modern workstations and desktops, it's generally recommended to use a
+ bootloader as difficulties can arise with multiple kernels and secure boot.
+ For more details on EFISTUB,
+ see "Documentation/admin-guide/efi-stub.rst".
+
+ - It's important to note that as of 2016 LILO (LInux LOader) is no longer in
+ active development, though as it was extremely popular, it often comes up
+ in documentation. Popular alternatives include GRUB2, rEFInd, Syslinux,
+ systemd-boot, or EFISTUB. For various reasons, it's not recommended to use
+ software that's no longer in active development.
+
+ - Chances are your distribution includes an install script and running
+ ``make install`` will be all that's needed. Should that not be the case
+ you'll have to identify your bootloader and reference its documentation or
+ configure your EFI.
+
+Legacy LILO Instructions
+------------------------
+
+
+ - If you use LILO the kernel images are specified in the file /etc/lilo.conf.
+ The kernel image file is usually /vmlinuz, /boot/vmlinuz, /bzImage or
+ /boot/bzImage. To use the new kernel, save a copy of the old image and copy
+ the new image over the old one. Then, you MUST RERUN LILO to update the
+ loading map! If you don't, you won't be able to boot the new kernel image.
+
+ - Reinstalling LILO is usually a matter of running /sbin/lilo. You may wish
+ to edit /etc/lilo.conf to specify an entry for your old kernel image
+ (say, /vmlinux.old) in case the new one does not work. See the LILO docs
+ for more information.
+
+ - After reinstalling LILO, you should be all set. Shutdown the system,
reboot, and enjoy!
- If you ever need to change the default root device, video mode,
- etc. in the kernel image, use your bootloader's boot options
- where appropriate. No need to recompile the kernel to change
- these parameters.
+ - If you ever need to change the default root device, video mode, etc. in the
+ kernel image, use your bootloader's boot options where appropriate. No need
+ to recompile the kernel to change these parameters.
- Reboot with the new kernel and enjoy.
+
If something goes wrong
-----------------------
@@ -335,5 +356,5 @@ instructions at 'Documentation/admin-guide/reporting-issues.rst'.
Hints on understanding kernel bug reports are in
'Documentation/admin-guide/bug-hunting.rst'. More on debugging the kernel
-with gdb is in 'Documentation/dev-tools/gdb-kernel-debugging.rst' and
-'Documentation/dev-tools/kgdb.rst'.
+with gdb is in 'Documentation/process/debugging/gdb-kernel-debugging.rst' and
+'Documentation/process/debugging/kgdb.rst'.
diff --git a/Documentation/admin-guide/abi-obsolete-files.rst b/Documentation/admin-guide/abi-obsolete-files.rst
new file mode 100644
index 000000000000..3061a916b4b5
--- /dev/null
+++ b/Documentation/admin-guide/abi-obsolete-files.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Obsolete ABI Files
+==================
+
+.. kernel-abi:: obsolete
+ :no-symbols:
diff --git a/Documentation/admin-guide/abi-obsolete.rst b/Documentation/admin-guide/abi-obsolete.rst
index 594e697aa1b2..640f3903e847 100644
--- a/Documentation/admin-guide/abi-obsolete.rst
+++ b/Documentation/admin-guide/abi-obsolete.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
ABI obsolete symbols
====================
@@ -7,5 +9,5 @@ marked to be removed at some later point in time.
The description of the interface will document the reason why it is
obsolete and when it can be expected to be removed.
-.. kernel-abi:: ABI/obsolete
- :rst:
+.. kernel-abi:: obsolete
+ :no-files:
diff --git a/Documentation/admin-guide/abi-removed-files.rst b/Documentation/admin-guide/abi-removed-files.rst
new file mode 100644
index 000000000000..f1bdfadd2ec4
--- /dev/null
+++ b/Documentation/admin-guide/abi-removed-files.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Removed ABI Files
+=================
+
+.. kernel-abi:: removed
+ :no-symbols:
diff --git a/Documentation/admin-guide/abi-removed.rst b/Documentation/admin-guide/abi-removed.rst
index f9e000c81828..88832d3eacd6 100644
--- a/Documentation/admin-guide/abi-removed.rst
+++ b/Documentation/admin-guide/abi-removed.rst
@@ -1,5 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
ABI removed symbols
===================
-.. kernel-abi:: ABI/removed
- :rst:
+.. kernel-abi:: removed
+ :no-files:
diff --git a/Documentation/admin-guide/abi-stable-files.rst b/Documentation/admin-guide/abi-stable-files.rst
new file mode 100644
index 000000000000..f867738fc178
--- /dev/null
+++ b/Documentation/admin-guide/abi-stable-files.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Stable ABI Files
+================
+
+.. kernel-abi:: stable
+ :no-symbols:
diff --git a/Documentation/admin-guide/abi-stable.rst b/Documentation/admin-guide/abi-stable.rst
index fc3361d847b1..528c68401f4b 100644
--- a/Documentation/admin-guide/abi-stable.rst
+++ b/Documentation/admin-guide/abi-stable.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
ABI stable symbols
==================
@@ -10,5 +12,5 @@ for at least 2 years.
Most interfaces (like syscalls) are expected to never change and always
be available.
-.. kernel-abi:: ABI/stable
- :rst:
+.. kernel-abi:: stable
+ :no-files:
diff --git a/Documentation/admin-guide/abi-testing-files.rst b/Documentation/admin-guide/abi-testing-files.rst
new file mode 100644
index 000000000000..1da868e42fdb
--- /dev/null
+++ b/Documentation/admin-guide/abi-testing-files.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Testing ABI Files
+=================
+
+.. kernel-abi:: testing
+ :no-symbols:
diff --git a/Documentation/admin-guide/abi-testing.rst b/Documentation/admin-guide/abi-testing.rst
index 19767926b344..6153ebd38e2d 100644
--- a/Documentation/admin-guide/abi-testing.rst
+++ b/Documentation/admin-guide/abi-testing.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
ABI testing symbols
===================
@@ -16,5 +18,5 @@ Programs that use these interfaces are strongly encouraged to add their
name to the description of these interfaces, so that the kernel
developers can easily notify them if any changes occur.
-.. kernel-abi:: ABI/testing
- :rst:
+.. kernel-abi:: testing
+ :no-files:
diff --git a/Documentation/admin-guide/abi.rst b/Documentation/admin-guide/abi.rst
index bcab3ef2597c..c6039359e585 100644
--- a/Documentation/admin-guide/abi.rst
+++ b/Documentation/admin-guide/abi.rst
@@ -1,7 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+
=====================
Linux ABI description
=====================
+.. kernel-abi:: README
+
+ABI symbols
+-----------
+
.. toctree::
:maxdepth: 2
@@ -9,3 +16,14 @@ Linux ABI description
abi-testing
abi-obsolete
abi-removed
+
+ABI files
+---------
+
+.. toctree::
+ :maxdepth: 2
+
+ abi-stable-files
+ abi-testing-files
+ abi-obsolete-files
+ abi-removed-files
diff --git a/Documentation/admin-guide/aoe/udev.txt b/Documentation/admin-guide/aoe/udev.txt
index 5fb756466bc7..d55ecb411c21 100644
--- a/Documentation/admin-guide/aoe/udev.txt
+++ b/Documentation/admin-guide/aoe/udev.txt
@@ -2,7 +2,7 @@
# They may be installed along the following lines. Check the section
# 8 udev manpage to see whether your udev supports SUBSYSTEM, and
# whether it uses one or two equal signs for SUBSYSTEM and KERNEL.
-#
+#
# ecashin@makki ~$ su
# Password:
# bash# find /etc -type f -name udev.conf
@@ -13,7 +13,7 @@
# 10-wacom.rules 50-udev.rules
# bash# cp /path/to/linux/Documentation/admin-guide/aoe/udev.txt \
# /etc/udev/rules.d/60-aoe.rules
-#
+#
# aoe char devices
SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220"
@@ -22,5 +22,5 @@ SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="02
SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220"
SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220"
-# aoe block devices
+# aoe block devices
KERNEL=="etherd*", GROUP="disk"
diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst
index 6fdb495ac466..f71f349553e4 100644
--- a/Documentation/admin-guide/bcache.rst
+++ b/Documentation/admin-guide/bcache.rst
@@ -17,8 +17,7 @@ The latest bcache kernel code can be found from mainline Linux kernel:
It's designed around the performance characteristics of SSDs - it only allocates
in erase block sized buckets, and it uses a hybrid btree/log to track cached
extents (which can be anywhere from a single sector to the bucket size). It's
-designed to avoid random writes at all costs; it fills up an erase block
-sequentially, then issues a discard before reusing it.
+designed to avoid random writes at all costs.
Both writethrough and writeback caching are supported. Writeback defaults to
off, but can be switched on and off arbitrarily at runtime. Bcache goes to
@@ -618,19 +617,11 @@ bucket_size
cache_replacement_policy
One of either lru, fifo or random.
-discard
- Boolean; if on a discard/TRIM will be issued to each bucket before it is
- reused. Defaults to off, since SATA TRIM is an unqueued command (and thus
- slow).
-
freelist_percent
Size of the freelist as a percentage of nbuckets. Can be written to to
increase the number of buckets kept on the freelist, which lets you
artificially reduce the size of the cache at runtime. Mostly for testing
- purposes (i.e. testing how different size caches affect your hit rate), but
- since buckets are discarded when they move on to the freelist will also make
- the SSD's garbage collection easier by effectively giving it more reserved
- space.
+ purposes (i.e. testing how different size caches affect your hit rate).
io_errors
Number of errors that have occurred, decayed by io_error_halflife.
diff --git a/Documentation/admin-guide/blockdev/index.rst b/Documentation/admin-guide/blockdev/index.rst
index 957ccf617797..3262397ebe8f 100644
--- a/Documentation/admin-guide/blockdev/index.rst
+++ b/Documentation/admin-guide/blockdev/index.rst
@@ -11,6 +11,7 @@ Block Devices
nbd
paride
ramdisk
+ zoned_loop
zram
drbd/index
diff --git a/Documentation/admin-guide/blockdev/paride.rst b/Documentation/admin-guide/blockdev/paride.rst
index e85ad37cc0e5..b2f627d4c2f8 100644
--- a/Documentation/admin-guide/blockdev/paride.rst
+++ b/Documentation/admin-guide/blockdev/paride.rst
@@ -118,7 +118,7 @@ and high-level drivers that you would use:
================ ============ ========
All parports and all protocol drivers are probed automatically unless probe=0
-parameter is used. So just "modprobe epat" is enough for a Imation SuperDisk
+parameter is used. So just "modprobe epat" is enough for an Imation SuperDisk
drive to work.
Manual device creation::
diff --git a/Documentation/admin-guide/blockdev/zoned_loop.rst b/Documentation/admin-guide/blockdev/zoned_loop.rst
new file mode 100644
index 000000000000..806adde664db
--- /dev/null
+++ b/Documentation/admin-guide/blockdev/zoned_loop.rst
@@ -0,0 +1,182 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Zoned Loop Block Device
+=======================
+
+.. Contents:
+
+ 1) Overview
+ 2) Creating a Zoned Device
+ 3) Deleting a Zoned Device
+ 4) Example
+
+
+1) Overview
+-----------
+
+The zoned loop block device driver (zloop) allows a user to create a zoned block
+device using one regular file per zone as backing storage. This driver does not
+directly control any hardware and uses read, write and truncate operations to
+regular files of a file system to emulate a zoned block device.
+
+Using zloop, zoned block devices with a configurable capacity, zone size and
+number of conventional zones can be created. The storage for each zone of the
+device is implemented using a regular file with a maximum size equal to the zone
+size. The size of a file backing a conventional zone is always equal to the zone
+size. The size of a file backing a sequential zone indicates the amount of data
+sequentially written to the file, that is, the size of the file directly
+indicates the position of the write pointer of the zone.
+
+When resetting a sequential zone, its backing file size is truncated to zero.
+Conversely, for a zone finish operation, the backing file is truncated to the
+zone size. With this, the maximum capacity of a zloop zoned block device created
+can be larger configured to be larger than the storage space available on the
+backing file system. Of course, for such configuration, writing more data than
+the storage space available on the backing file system will result in write
+errors.
+
+The zoned loop block device driver implements a complete zone transition state
+machine. That is, zones can be empty, implicitly opened, explicitly opened,
+closed or full. The current implementation does not support any limits on the
+maximum number of open and active zones.
+
+No user tools are necessary to create and delete zloop devices.
+
+2) Creating a Zoned Device
+--------------------------
+
+Once the zloop module is loaded (or if zloop is compiled in the kernel), the
+character device file /dev/zloop-control can be used to add a zloop device.
+This is done by writing an "add" command directly to the /dev/zloop-control
+device::
+
+ $ modprobe zloop
+ $ ls -l /dev/zloop*
+ crw-------. 1 root root 10, 123 Jan 6 19:18 /dev/zloop-control
+
+ $ mkdir -p <base directory/<device ID>
+ $ echo "add [options]" > /dev/zloop-control
+
+The options available for the add command can be listed by reading the
+/dev/zloop-control device::
+
+ $ cat /dev/zloop-control
+ add id=%d,capacity_mb=%u,zone_size_mb=%u,zone_capacity_mb=%u,conv_zones=%u,base_dir=%s,nr_queues=%u,queue_depth=%u,buffered_io
+ remove id=%d
+
+In more details, the options that can be used with the "add" command are as
+follows.
+
+=================== =========================================================
+id Device number (the X in /dev/zloopX).
+ Default: automatically assigned.
+capacity_mb Device total capacity in MiB. This is always rounded up
+ to the nearest higher multiple of the zone size.
+ Default: 16384 MiB (16 GiB).
+zone_size_mb Device zone size in MiB. Default: 256 MiB.
+zone_capacity_mb Device zone capacity (must always be equal to or lower
+ than the zone size. Default: zone size.
+conv_zones Total number of conventioanl zones starting from
+ sector 0
+ Default: 8
+base_dir Path to the base directory where to create the directory
+ containing the zone files of the device.
+ Default=/var/local/zloop.
+ The device directory containing the zone files is always
+ named with the device ID. E.g. the default zone file
+ directory for /dev/zloop0 is /var/local/zloop/0.
+nr_queues Number of I/O queues of the zoned block device. This
+ value is always capped by the number of online CPUs
+ Default: 1
+queue_depth Maximum I/O queue depth per I/O queue.
+ Default: 64
+buffered_io Do buffered IOs instead of direct IOs (default: false)
+zone_append Enable or disable a zloop device native zone append
+ support.
+ Default: 1 (enabled).
+ If native zone append support is disabled, the block layer
+ will emulate this operation using regular write
+ operations.
+ordered_zone_append Enable zloop mitigation of zone append reordering.
+ Default: disabled.
+ This is useful for testing file systems file data mapping
+ (extents), as when enabled, this can significantly reduce
+ the number of data extents needed to for a file data
+ mapping.
+=================== =========================================================
+
+3) Deleting a Zoned Device
+--------------------------
+
+Deleting an unused zoned loop block device is done by issuing the "remove"
+command to /dev/zloop-control, specifying the ID of the device to remove::
+
+ $ echo "remove id=X" > /dev/zloop-control
+
+The remove command does not have any option.
+
+A zoned device that was removed can be re-added again without any change to the
+state of the device zones: the device zones are restored to their last state
+before the device was removed. Adding again a zoned device after it was removed
+must always be done using the same configuration as when the device was first
+added. If a zone configuration change is detected, an error will be returned and
+the zoned device will not be created.
+
+To fully delete a zoned device, after executing the remove operation, the device
+base directory containing the backing files of the device zones must be deleted.
+
+4) Example
+----------
+
+The following sequence of commands creates a 2GB zoned device with zones of 64
+MB and a zone capacity of 63 MB::
+
+ $ modprobe zloop
+ $ mkdir -p /var/local/zloop/0
+ $ echo "add capacity_mb=2048,zone_size_mb=64,zone_capacity=63MB" > /dev/zloop-control
+
+For the device created (/dev/zloop0), the zone backing files are all created
+under the default base directory (/var/local/zloop)::
+
+ $ ls -l /var/local/zloop/0
+ total 0
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000000
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000001
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000002
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000003
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000004
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000005
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000006
+ -rw-------. 1 root root 67108864 Jan 6 22:23 cnv-000007
+ -rw-------. 1 root root 0 Jan 6 22:23 seq-000008
+ -rw-------. 1 root root 0 Jan 6 22:23 seq-000009
+ ...
+
+The zoned device created (/dev/zloop0) can then be used normally::
+
+ $ lsblk -z
+ NAME ZONED ZONE-SZ ZONE-NR ZONE-AMAX ZONE-OMAX ZONE-APP ZONE-WGRAN
+ zloop0 host-managed 64M 32 0 0 1M 4K
+ $ blkzone report /dev/zloop0
+ start: 0x000000000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000020000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000040000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000060000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000080000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000a0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000c0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x0000e0000, len 0x020000, cap 0x020000, wptr 0x000000 reset:0 non-seq:0, zcond: 0(nw) [type: 1(CONVENTIONAL)]
+ start: 0x000100000, len 0x020000, cap 0x01f800, wptr 0x000000 reset:0 non-seq:0, zcond: 1(em) [type: 2(SEQ_WRITE_REQUIRED)]
+ start: 0x000120000, len 0x020000, cap 0x01f800, wptr 0x000000 reset:0 non-seq:0, zcond: 1(em) [type: 2(SEQ_WRITE_REQUIRED)]
+ ...
+
+Deleting this device is done using the command::
+
+ $ echo "remove id=0" > /dev/zloop-control
+
+The removed device can be re-added again using the same "add" command as when
+the device was first created. To fully delete a zoned device, its backing files
+should also be deleted after executing the remove command::
+
+ $ rm -r /var/local/zloop/0
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index ee2b0030d416..3e273c1bb749 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -47,12 +47,14 @@ The list of possible return codes:
-ENOMEM zram was not able to allocate enough memory to fulfil your
needs.
-EINVAL invalid input has been provided.
+-EAGAIN re-try operation later (e.g. when attempting to run recompress
+ and writeback simultaneously).
======== =============================================================
If you use 'echo', the returned value is set by the 'echo' utility,
and, in general case, something like::
- echo 3 > /sys/block/zram0/max_comp_streams
+ echo foo > /sys/block/zram0/comp_algorithm
if [ $? -ne 0 ]; then
handle_error
fi
@@ -71,21 +73,7 @@ This creates 4 devices: /dev/zram{0,1,2,3}
num_devices parameter is optional and tells zram how many devices should be
pre-created. Default: 1.
-2) Set max number of compression streams
-========================================
-
-Regardless of the value passed to this attribute, ZRAM will always
-allocate multiple compression streams - one per online CPU - thus
-allowing several concurrent compression operations. The number of
-allocated compression streams goes down when some of the CPUs
-become offline. There is no single-compression-stream mode anymore,
-unless you are running a UP system or have only 1 CPU online.
-
-To find out how many streams are currently available::
-
- cat /sys/block/zram0/max_comp_streams
-
-3) Select compression algorithm
+2) Select compression algorithm
===============================
Using comp_algorithm device attribute one can see available and
@@ -102,15 +90,39 @@ Examples::
#select lzo compression algorithm
echo lzo > /sys/block/zram0/comp_algorithm
-For the time being, the `comp_algorithm` content does not necessarily
-show every compression algorithm supported by the kernel. We keep this
-list primarily to simplify device configuration and one can configure
-a new device with a compression algorithm that is not listed in
-`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API
-and, if some of the algorithms were built as modules, it's impossible
-to list all of them using, for instance, /proc/crypto or any other
-method. This, however, has an advantage of permitting the usage of
-custom crypto compression modules (implementing S/W or H/W compression).
+For the time being, the `comp_algorithm` content shows only compression
+algorithms that are supported by zram.
+
+3) Set compression algorithm parameters: Optional
+=================================================
+
+Compression algorithms may support specific parameters which can be
+tweaked for particular dataset. ZRAM has an `algorithm_params` device
+attribute which provides a per-algorithm params configuration.
+
+For example, several compression algorithms support `level` parameter.
+In addition, certain compression algorithms support pre-trained dictionaries,
+which significantly change algorithms' characteristics. In order to configure
+compression algorithm to use external pre-trained dictionary, pass full
+path to the `dict` along with other parameters::
+
+ #pass path to pre-trained zstd dictionary
+ echo "algo=zstd dict=/etc/dictionary" > /sys/block/zram0/algorithm_params
+
+ #same, but using algorithm priority
+ echo "priority=1 dict=/etc/dictionary" > \
+ /sys/block/zram0/algorithm_params
+
+ #pass path to pre-trained zstd dictionary and compression level
+ echo "algo=zstd level=8 dict=/etc/dictionary" > \
+ /sys/block/zram0/algorithm_params
+
+Parameters are algorithm specific: not all algorithms support pre-trained
+dictionaries, not all algorithms support `level`. Furthermore, for certain
+algorithms `level` controls the compression level (the higher the value the
+better the compression ratio, it even can take negatives values for some
+algorithms), for other algorithms `level` is acceleration level (the higher
+the value the lower the compression ratio).
4) Set Disksize
===============
@@ -202,9 +214,8 @@ mem_limit WO specifies the maximum amount of memory ZRAM can
writeback_limit WO specifies the maximum amount of write IO zram
can write out to backing device as 4KB unit
writeback_limit_enable RW show and set writeback_limit feature
-max_comp_streams RW the number of possible concurrent compress
- operations
comp_algorithm RW show and change the compression algorithm
+algorithm_params WO setup compression algorithm parameters
compact WO trigger memory compaction
debug_stat RO this file is used for zram debugging purposes
backing_dev RW set up backend storage for zram to write out
@@ -284,7 +295,7 @@ a single line of text and contains the following stats separated by whitespace:
============== =============================================================
9) Deactivate
-=============
+==============
::
@@ -306,6 +317,26 @@ a single line of text and contains the following stats separated by whitespace:
Optional Feature
================
+IDLE pages tracking
+-------------------
+
+zram has built-in support for idle pages tracking (that is, allocated but
+not used pages). This feature is useful for e.g. zram writeback and
+recompression. In order to mark pages as idle, execute the following command::
+
+ echo all > /sys/block/zramX/idle
+
+This will mark all allocated zram pages as idle. The idle mark will be
+removed only when the page (block) is accessed (e.g. overwritten or freed).
+Additionally, when CONFIG_ZRAM_TRACK_ENTRY_ACTIME is enabled, pages can be
+marked as idle based on how many seconds have passed since the last access to
+a particular zram page::
+
+ echo 86400 > /sys/block/zramX/idle
+
+In this example, all pages which haven't been accessed in more than 86400
+seconds (one day) will be marked idle.
+
writeback
---------
@@ -320,24 +351,7 @@ If admin wants to use incompressible page writeback, they could do it via::
echo huge > /sys/block/zramX/writeback
-To use idle page writeback, first, user need to declare zram pages
-as idle::
-
- echo all > /sys/block/zramX/idle
-
-From now on, any pages on zram are idle pages. The idle mark
-will be removed until someone requests access of the block.
-IOW, unless there is access request, those pages are still idle pages.
-Additionally, when CONFIG_ZRAM_TRACK_ENTRY_ACTIME is enabled pages can be
-marked as idle based on how long (in seconds) it's been since they were
-last accessed::
-
- echo 86400 > /sys/block/zramX/idle
-
-In this example all pages which haven't been accessed in more than 86400
-seconds (one day) will be marked idle.
-
-Admin can request writeback of those idle pages at right timing via::
+Admin can request writeback of idle pages at right timing via::
echo idle > /sys/block/zramX/writeback
@@ -358,6 +372,23 @@ they could write a page index into the interface::
echo "page_index=1251" > /sys/block/zramX/writeback
+In Linux 6.16 this interface underwent some rework. First, the interface
+now supports `key=value` format for all of its parameters (`type=huge_idle`,
+etc.) Second, the support for `page_indexes` was introduced, which specify
+`LOW-HIGH` range (or ranges) of pages to be written-back. This reduces the
+number of syscalls, but more importantly this enables optimal post-processing
+target selection strategy. Usage example::
+
+ echo "type=idle" > /sys/block/zramX/writeback
+ echo "page_indexes=1-100 page_indexes=200-300" > \
+ /sys/block/zramX/writeback
+
+We also now permit multiple page_index params per call and a mix of
+single pages and page ranges::
+
+ echo page_index=42 page_index=99 page_indexes=100-200 \
+ page_indexes=500-700 > /sys/block/zramX/writeback
+
If there are lots of write IO with flash device, potentially, it has
flash wearout problem so that admin needs to design write limitation
to guarantee storage health for entire product life.
@@ -466,7 +497,10 @@ of equal or greater size:::
#recompress idle pages larger than 2000 bytes
echo "type=idle threshold=2000" > /sys/block/zramX/recompress
-Recompression of idle pages requires memory tracking.
+It is also possible to limit the number of pages zram re-compression will
+attempt to recompress:::
+
+ echo "type=huge_idle max_pages=42" > /sys/block/zramX/recompress
During re-compression for every page, that matches re-compression criteria,
ZRAM iterates the list of registered alternative compression algorithms in
@@ -482,11 +516,14 @@ registered compression algorithms, increases our chances of finding the
algorithm that successfully compresses a particular page. Sometimes, however,
it is convenient (and sometimes even necessary) to limit recompression to
only one particular algorithm so that it will not try any other algorithms.
-This can be achieved by providing a algo=NAME parameter:::
+This can be achieved by providing a `algo` or `priority` parameter:::
#use zstd algorithm only (if registered)
echo "type=huge algo=zstd" > /sys/block/zramX/recompress
+ #use zstd algorithm only (if zstd was registered under priority 1)
+ echo "type=huge priority=1" > /sys/block/zramX/recompress
+
memory tracking
===============
diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst
index 91339efdcb54..7a86042c9b6d 100644
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@@ -265,7 +265,7 @@ The final kernel cmdline will be the following::
Config File Limitation
======================
-Currently the maximum config size size is 32KB and the total key-words (not
+Currently the maximum config size is 32KB and the total key-words (not
key-value entries) must be under 1024 nodes.
Note: this is not the number of entries but nodes, an entry must consume
more than 2 nodes (a key-word and a value). So theoretically, it will be
diff --git a/Documentation/admin-guide/braille-console.rst b/Documentation/admin-guide/braille-console.rst
index 18e79337dcfd..153472e93cae 100644
--- a/Documentation/admin-guide/braille-console.rst
+++ b/Documentation/admin-guide/braille-console.rst
@@ -21,8 +21,8 @@ override the baud rate to 115200, etc.
By default, the braille device will just show the last kernel message (console
mode). To review previous messages, press the Insert key to switch to the VT
review mode. In review mode, the arrow keys permit to browse in the VT content,
-:kbd:`PAGE-UP`/:kbd:`PAGE-DOWN` keys go at the top/bottom of the screen, and
-the :kbd:`HOME` key goes back
+`PAGE-UP`/`PAGE-DOWN` keys go at the top/bottom of the screen, and
+the `HOME` key goes back
to the cursor, hence providing very basic screen reviewing facility.
Sound feedback can be obtained by adding the ``braille_console.sound=1`` kernel
diff --git a/Documentation/admin-guide/bug-bisect.rst b/Documentation/admin-guide/bug-bisect.rst
index 325c5d0ed34a..f4f867cabb17 100644
--- a/Documentation/admin-guide/bug-bisect.rst
+++ b/Documentation/admin-guide/bug-bisect.rst
@@ -1,76 +1,165 @@
-Bisecting a bug
-+++++++++++++++
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
-Last updated: 28 October 2016
+======================
+Bisecting a regression
+======================
-Introduction
-============
+This document describes how to use a ``git bisect`` to find the source code
+change that broke something -- for example when some functionality stopped
+working after upgrading from Linux 6.0 to 6.1.
-Always try the latest kernel from kernel.org and build from source. If you are
-not confident in doing that please report the bug to your distribution vendor
-instead of to a kernel developer.
+The text focuses on the gist of the process. If you are new to bisecting the
+kernel, better follow Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
+instead: it depicts everything from start to finish while covering multiple
+aspects even kernel developers occasionally forget. This includes detecting
+situations early where a bisection would be a waste of time, as nobody would
+care about the result -- for example, because the problem happens after the
+kernel marked itself as 'tainted', occurs in an abandoned version, was already
+fixed, or is caused by a .config change you or your Linux distributor performed.
-Finding bugs is not always easy. Have a go though. If you can't find it don't
-give up. Report as much as you have found to the relevant maintainer. See
-MAINTAINERS for who that is for the subsystem you have worked on.
+Finding the change causing a kernel issue using a bisection
+===========================================================
-Before you submit a bug report read
-'Documentation/admin-guide/reporting-issues.rst'.
+*Note: the following process assumes you prepared everything for a bisection.
+This includes having a Git clone with the appropriate sources, installing the
+software required to build and install kernels, as well as a .config file stored
+in a safe place (the following example assumes '~/prepared_kernel_.config') to
+use as pristine base at each bisection step; ideally, you have also worked out
+a fully reliable and straight-forward way to reproduce the regression, too.*
-Devices not appearing
-=====================
+* Preparation: start the bisection and tell Git about the points in the history
+ you consider to be working and broken, which Git calls 'good' and 'bad'::
-Often this is caused by udev/systemd. Check that first before blaming it
-on the kernel.
+ git bisect start
+ git bisect good v6.0
+ git bisect bad v6.1
-Finding patch that caused a bug
-===============================
+ Instead of Git tags like 'v6.0' and 'v6.1' you can specify commit-ids, too.
-Using the provided tools with ``git`` makes finding bugs easy provided the bug
-is reproducible.
+1. Copy your prepared .config into the build directory and adjust it to the
+ needs of the codebase Git checked out for testing::
-Steps to do it:
+ cp ~/prepared_kernel_.config .config
+ make olddefconfig
-- build the Kernel from its git source
-- start bisect with [#f1]_::
-
- $ git bisect start
-
-- mark the broken changeset with::
-
- $ git bisect bad [commit]
-
-- mark a changeset where the code is known to work with::
-
- $ git bisect good [commit]
-
-- rebuild the Kernel and test
-- interact with git bisect by using either::
-
- $ git bisect good
-
- or::
-
- $ git bisect bad
-
- depending if the bug happened on the changeset you're testing
-- After some interactions, git bisect will give you the changeset that
- likely caused the bug.
-
-- For example, if you know that the current version is bad, and version
- 4.8 is good, you could do::
-
- $ git bisect start
- $ git bisect bad # Current version is bad
- $ git bisect good v4.8
-
-
-.. [#f1] You can, optionally, provide both good and bad arguments at git
- start with ``git bisect start [BAD] [GOOD]``
-
-For further references, please read:
-
-- The man page for ``git-bisect``
-- `Fighting regressions with git bisect <https://www.kernel.org/pub/software/scm/git/docs/git-bisect-lk2009.html>`_
-- `Fully automated bisecting with "git bisect run" <https://lwn.net/Articles/317154>`_
-- `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_
+2. Now build, install, and boot a kernel. This might fail for unrelated reasons,
+ for example, when a compile error happens at the current stage of the
+ bisection a later change resolves. In such cases run ``git bisect skip`` and
+ go back to step 1.
+
+3. Check if the functionality that regressed works in the kernel you just built.
+
+ If it works, execute::
+
+ git bisect good
+
+ If it is broken, run::
+
+ git bisect bad
+
+ Note, getting this wrong just once will send the rest of the bisection
+ totally off course. To prevent having to start anew later you thus want to
+ ensure what you tell Git is correct; it is thus often wise to spend a few
+ minutes more on testing in case your reproducer is unreliable.
+
+ After issuing one of these two commands, Git will usually check out another
+ bisection point and print something like 'Bisecting: 675 revisions left to
+ test after this (roughly 10 steps)'. In that case go back to step 1.
+
+ If Git instead prints something like 'cafecaca0c0dacafecaca0c0dacafecaca0c0da
+ is the first bad commit', then you have finished the bisection. In that case
+ move to the next point below. Note, right after displaying that line Git will
+ show some details about the culprit including its patch description; this can
+ easily fill your terminal, so you might need to scroll up to see the message
+ mentioning the culprit's commit-id.
+
+ In case you missed Git's output, you can always run ``git bisect log`` to
+ print the status: it will show how many steps remain or mention the result of
+ the bisection.
+
+* Recommended complementary task: put the bisection log and the current .config
+ file aside for the bug report; furthermore tell Git to reset the sources to
+ the state before the bisection::
+
+ git bisect log > ~/bisection-log
+ cp .config ~/bisection-config-culprit
+ git bisect reset
+
+* Recommended optional task: try reverting the culprit on top of the latest
+ codebase and check if that fixes your bug; if that is the case, it validates
+ the bisection and enables developers to resolve the regression through a
+ revert.
+
+ To try this, update your clone and check out latest mainline. Then tell Git
+ to revert the change by specifying its commit-id::
+
+ git revert --no-edit cafec0cacaca0
+
+ Git might reject this, for example when the bisection landed on a merge
+ commit. In that case, abandon the attempt. Do the same, if Git fails to revert
+ the culprit on its own because later changes depend on it -- at least unless
+ you bisected a stable or longterm kernel series, in which case you want to
+ check out its latest codebase and try a revert there.
+
+ If a revert succeeds, build and test another kernel to check if reverting
+ resolved your regression.
+
+With that the process is complete. Now report the regression as described by
+Documentation/admin-guide/reporting-issues.rst.
+
+Bisecting linux-next
+--------------------
+
+If you face a problem only happening in linux-next, bisect between the
+linux-next branches 'stable' and 'master'. The following commands will start
+the process for a linux-next tree you added as a remote called 'next'::
+
+ git bisect start
+ git bisect good next/stable
+ git bisect bad next/master
+
+The 'stable' branch refers to the state of linux-mainline that the current
+linux-next release (found in the 'master' branch) is based on -- the former
+thus should be free of any problems that show up in -next, but not in Linus'
+tree.
+
+This will bisect across a wide range of changes, some of which you might have
+used in earlier linux-next releases without problems. Sadly there is no simple
+way to avoid checking them: bisecting from one linux-next release to a later
+one (say between 'next-20241020' and 'next-20241021') is impossible, as they
+share no common history.
+
+Additional reading material
+---------------------------
+
+* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
+ `fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
+ in the Git documentation.
+* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
+ from kernel developer Nathan Chancellor.
+* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
+* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
+
+..
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text -- but for copyright reasons please CC
+ linux-doc@vger.kernel.org and 'sign-off' your contribution as
+ Documentation/process/submitting-patches.rst explains in the section 'Sign
+ your work - the Developer's Certificate of Origin'.
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use 'The Linux kernel development community' for author attribution
+ and link this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/bug-bisect.rst
+
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index 95299b08c405..7da0504388ec 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -196,7 +196,7 @@ will see the assembler code for the routine shown, but if your kernel has
debug symbols the C code will also be available. (Debug symbols can be enabled
in the kernel hacking menu of the menu configuration.) For example::
- $ objdump -r -S -l --disassemble net/dccp/ipv4.o
+ $ objdump -r -S -l --disassemble net/ipv4/tcp.o
.. note::
@@ -244,15 +244,15 @@ Reporting the bug
Once you find where the bug happened, by inspecting its location,
you could either try to fix it yourself or report it upstream.
-In order to report it upstream, you should identify the mailing list
-used for the development of the affected code. This can be done by using
-the ``get_maintainer.pl`` script.
+In order to report it upstream, you should identify the bug tracker, if any, or
+mailing list used for the development of the affected code. This can be done by
+using the ``get_maintainer.pl`` script.
For example, if you find a bug at the gspca's sonixj.c file, you can get
its maintainers with::
- $ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
- Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
+ $ ./scripts/get_maintainer.pl --bug -f drivers/media/usb/gspca/sonixj.c
+ Hans Verkuil <hverkuil@kernel.org> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
Mauro Carvalho Chehab <mchehab@kernel.org> (maintainer:MEDIA INPUT INFRASTRUCTURE (V4L/DVB),commit_signer:1/1=100%)
Tejun Heo <tj@kernel.org> (commit_signer:1/1=100%)
Bhaktipriya Shridhar <bhaktipriya96@gmail.com> (commit_signer:1/1=100%,authored:1/1=100%,added_lines:4/4=100%,removed_lines:9/9=100%)
@@ -267,11 +267,12 @@ Please notice that it will point to:
- The driver maintainer (Hans Verkuil);
- The subsystem maintainer (Mauro Carvalho Chehab);
- The driver and/or subsystem mailing list (linux-media@vger.kernel.org);
-- the Linux Kernel mailing list (linux-kernel@vger.kernel.org).
+- The Linux Kernel mailing list (linux-kernel@vger.kernel.org);
+- The bug reporting URIs for the driver/subsystem (none in the above example).
-Usually, the fastest way to have your bug fixed is to report it to mailing
-list used for the development of the code (linux-media ML) copying the
-driver maintainer (Hans).
+If the listing contains bug reporting URIs at the end, please prefer them over
+email. Otherwise, please report bugs to the mailing list used for the
+development of the code (linux-media ML) copying the driver maintainer (Hans).
If you are totally stumped as to whom to send the report, and
``get_maintainer.pl`` didn't provide you anything useful, send it to
@@ -367,12 +368,3 @@ processed by ``klogd``::
Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128]
Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3
----------------------------------------------------------------------------
-
-::
-
- Dr. G.W. Wettstein Oncology Research Div. Computing Facility
- Roger Maris Cancer Center INTERNET: greg@wind.rmcc.com
- 820 4th St. N.
- Fargo, ND 58122
- Phone: 701-234-7556
diff --git a/Documentation/admin-guide/cgroup-v1/cgroups.rst b/Documentation/admin-guide/cgroup-v1/cgroups.rst
index 9343148ee993..463f98453323 100644
--- a/Documentation/admin-guide/cgroup-v1/cgroups.rst
+++ b/Documentation/admin-guide/cgroup-v1/cgroups.rst
@@ -13,7 +13,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
Modified by Paul Jackson <pj@sgi.com>
-Modified by Christoph Lameter <cl@linux.com>
+Modified by Christoph Lameter <cl@gentwo.org>
.. CONTENTS:
@@ -570,7 +570,7 @@ visible to cgroup_for_each_child/descendant_*() iterators. The
subsystem may choose to fail creation by returning -errno. This
callback can be used to implement reliable state sharing and
propagation along the hierarchy. See the comment on
-cgroup_for_each_descendant_pre() for details.
+cgroup_for_each_live_descendant_pre() for details.
``void css_offline(struct cgroup *cgrp);``
(cgroup_mutex held by caller)
diff --git a/Documentation/admin-guide/cgroup-v1/cpusets.rst b/Documentation/admin-guide/cgroup-v1/cpusets.rst
index ae646d621a8a..c7909e5ac136 100644
--- a/Documentation/admin-guide/cgroup-v1/cpusets.rst
+++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst
@@ -10,7 +10,7 @@ Written by Simon.Derr@bull.net
- Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
- Modified by Paul Jackson <pj@sgi.com>
-- Modified by Christoph Lameter <cl@linux.com>
+- Modified by Christoph Lameter <cl@gentwo.org>
- Modified by Paul Menage <menage@google.com>
- Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
@@ -179,7 +179,7 @@ files describing that cpuset:
- cpuset.mem_hardwall flag: is memory allocation hardwalled
- cpuset.memory_pressure: measure of how much paging pressure in cpuset
- cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
- - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
+ - cpuset.memory_spread_slab flag: OBSOLETE. Doesn't have any function.
- cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
- cpuset.sched_relax_domain_level: the searching range when migrating tasks
@@ -568,7 +568,7 @@ on the next tick. For some applications in special situation, waiting
The 'cpuset.sched_relax_domain_level' file allows you to request changing
this searching range as you like. This file takes int value which
-indicates size of searching range in levels ideally as follows,
+indicates size of searching range in levels approximately as follows,
otherwise initial value -1 that indicates the cpuset has no request.
====== ===========================================================
@@ -581,6 +581,11 @@ otherwise initial value -1 that indicates the cpuset has no request.
5 search system wide [on NUMA system]
====== ===========================================================
+Not all levels can be present and values can change depending on the
+system architecture and kernel configuration. Check
+/sys/kernel/debug/sched/domains/cpu*/domain*/ for system-specific
+details.
+
The system default is architecture dependent. The system default
can be changed using the relax_domain_level= boot parameter.
diff --git a/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
index 582d3427de3f..a964aff373b1 100644
--- a/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
+++ b/Documentation/admin-guide/cgroup-v1/freezer-subsystem.rst
@@ -125,3 +125,7 @@ to unfreeze all tasks in the container::
This is the basic mechanism which should do the right thing for user space task
in a simple scenario.
+
+This freezer implementation is affected by shortcomings (see commit
+76f969e8948d8 ("cgroup: cgroup v2 freezer")) and cgroup v2 freezer is
+recommended.
diff --git a/Documentation/admin-guide/cgroup-v1/hugetlb.rst b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
index 0fa724d82abb..493a8e386700 100644
--- a/Documentation/admin-guide/cgroup-v1/hugetlb.rst
+++ b/Documentation/admin-guide/cgroup-v1/hugetlb.rst
@@ -65,10 +65,12 @@ files include::
1. Page fault accounting
-hugetlb.<hugepagesize>.limit_in_bytes
-hugetlb.<hugepagesize>.max_usage_in_bytes
-hugetlb.<hugepagesize>.usage_in_bytes
-hugetlb.<hugepagesize>.failcnt
+::
+
+ hugetlb.<hugepagesize>.limit_in_bytes
+ hugetlb.<hugepagesize>.max_usage_in_bytes
+ hugetlb.<hugepagesize>.usage_in_bytes
+ hugetlb.<hugepagesize>.failcnt
The HugeTLB controller allows users to limit the HugeTLB usage (page fault) per
control group and enforces the limit during page fault. Since HugeTLB
@@ -82,10 +84,12 @@ getting SIGBUS.
2. Reservation accounting
-hugetlb.<hugepagesize>.rsvd.limit_in_bytes
-hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes
-hugetlb.<hugepagesize>.rsvd.usage_in_bytes
-hugetlb.<hugepagesize>.rsvd.failcnt
+::
+
+ hugetlb.<hugepagesize>.rsvd.limit_in_bytes
+ hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes
+ hugetlb.<hugepagesize>.rsvd.usage_in_bytes
+ hugetlb.<hugepagesize>.rsvd.failcnt
The HugeTLB controller allows to limit the HugeTLB reservations per control
group and enforces the controller limit at reservation time and at the fault of
diff --git a/Documentation/admin-guide/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
index 1f128458ddea..9f8e27355cba 100644
--- a/Documentation/admin-guide/cgroup-v1/memcg_test.rst
+++ b/Documentation/admin-guide/cgroup-v1/memcg_test.rst
@@ -102,7 +102,7 @@ Under below explanation, we assume CONFIG_SWAP=y.
The logic is very clear. (About migration, see below)
Note:
- __remove_from_page_cache() is called by remove_from_page_cache()
+ __filemap_remove_folio() is called by filemap_remove_folio()
and __remove_mapping().
6. Shmem(tmpfs) Page Cache
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index ca7d9402f6be..d6b1db8cc7eb 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -78,18 +78,23 @@ Brief summary of control files.
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
memory.soft_limit_in_bytes set/show soft limit of memory usage
This knob is not available on CONFIG_PREEMPT_RT systems.
+ This knob is deprecated and shouldn't be
+ used.
memory.stat show various statistics
memory.use_hierarchy set/show hierarchical account enabled
This knob is deprecated and shouldn't be
used.
memory.force_empty trigger forced page reclaim
memory.pressure_level set memory pressure notifications
+ This knob is deprecated and shouldn't be
+ used.
memory.swappiness set/show swappiness parameter of vmscan
(See sysctl's vm.swappiness)
- memory.move_charge_at_immigrate set/show controls of moving charges
+ Per memcg knob does not exist in cgroup v2.
+ memory.move_charge_at_immigrate This knob is deprecated.
+ memory.oom_control set/show oom controls.
This knob is deprecated and shouldn't be
used.
- memory.oom_control set/show oom controls.
memory.numa_stat show the number of memory usage per numa
node
memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel
@@ -105,10 +110,18 @@ Brief summary of control files.
memory.kmem.max_usage_in_bytes show max kernel memory usage recorded
memory.kmem.tcp.limit_in_bytes set/show hard limit for tcp buf memory
+ This knob is deprecated and shouldn't be
+ used.
memory.kmem.tcp.usage_in_bytes show current tcp buf memory allocation
+ This knob is deprecated and shouldn't be
+ used.
memory.kmem.tcp.failcnt show the number of tcp buf memory usage
hits limits
+ This knob is deprecated and shouldn't be
+ used.
memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
+ This knob is deprecated and shouldn't be
+ used.
==================================== ==========================================
1. History
@@ -229,10 +242,6 @@ behind this approach is that a cgroup that aggressively uses a shared
page will eventually get charged for it (once it is uncharged from
the cgroup that brought it in -- this will happen on memory pressure).
-But see :ref:`section 8.2 <cgroup-v1-memory-movable-charges>` when moving a
-task to another cgroup, its pages may be recharged to the new cgroup, if
-move_charge_at_immigrate has been chosen.
-
2.4 Swap Extension
--------------------------------------
@@ -300,14 +309,14 @@ When oom event notifier is registered, event will be delivered.
Lock order is as follows::
- Page lock (PG_locked bit of page->flags)
+ folio_lock
mm->page_table_lock or split pte_lock
folio_memcg_lock (memcg->move_lock)
mapping->i_pages lock
lruvec->lru_lock.
Per-node-per-memcgroup LRU (cgroup's private LRU) is guarded by
-lruvec->lru_lock; PG_lru bit of page->flags is cleared before
+lruvec->lru_lock; the folio LRU flag is cleared before
isolating a page from its LRU under lruvec->lru_lock.
.. _cgroup-v1-memory-kernel-extension:
@@ -601,6 +610,10 @@ memory.stat file includes following statistics:
'rss + mapped_file" will give you resident set size of cgroup.
+ Note that some kernel configurations might account complete larger
+ allocations (e.g., THP) towards 'rss' and 'mapped_file', even if
+ only some, but not all that memory is mapped.
+
(Note: file and shmem may be shared among other cgroups. In that case,
mapped_file is accounted only when the memory cgroup is owner of page
cache.)
@@ -693,8 +706,10 @@ For compatibility reasons writing 1 to memory.use_hierarchy will always pass::
# echo 1 > memory.use_hierarchy
-7. Soft limits
-==============
+7. Soft limits (DEPRECATED)
+===========================
+
+THIS IS DEPRECATED!
Soft limits allow for greater sharing of memory. The idea behind soft limits
is to allow control groups to use as much of the memory as needed, provided
@@ -740,78 +755,8 @@ If we want to change this to 1G, we can at any time use::
THIS IS DEPRECATED!
-It's expensive and unreliable! It's better practice to launch workload
-tasks directly from inside their target cgroup. Use dedicated workload
-cgroups to allow fine-grained policy adjustments without having to
-move physical pages between control domains.
-
-Users can move charges associated with a task along with task migration, that
-is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
-This feature is not supported in !CONFIG_MMU environments because of lack of
-page tables.
-
-8.1 Interface
--------------
-
-This feature is disabled by default. It can be enabled (and disabled again) by
-writing to memory.move_charge_at_immigrate of the destination cgroup.
-
-If you want to enable it::
-
- # echo (some positive value) > memory.move_charge_at_immigrate
-
-.. note::
- Each bits of move_charge_at_immigrate has its own meaning about what type
- of charges should be moved. See :ref:`section 8.2
- <cgroup-v1-memory-movable-charges>` for details.
-
-.. note::
- Charges are moved only when you move mm->owner, in other words,
- a leader of a thread group.
-
-.. note::
- If we cannot find enough space for the task in the destination cgroup, we
- try to make space by reclaiming memory. Task migration may fail if we
- cannot make enough space.
-
-.. note::
- It can take several seconds if you move charges much.
-
-And if you want disable it again::
-
- # echo 0 > memory.move_charge_at_immigrate
-
-.. _cgroup-v1-memory-movable-charges:
-
-8.2 Type of charges which can be moved
---------------------------------------
-
-Each bit in move_charge_at_immigrate has its own meaning about what type of
-charges should be moved. But in any case, it must be noted that an account of
-a page or a swap can be moved only when it is charged to the task's current
-(old) memory cgroup.
-
-+---+--------------------------------------------------------------------------+
-|bit| what type of charges would be moved ? |
-+===+==========================================================================+
-| 0 | A charge of an anonymous page (or swap of it) used by the target task. |
-| | You must enable Swap Extension (see 2.4) to enable move of swap charges. |
-+---+--------------------------------------------------------------------------+
-| 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) |
-| | and swaps of tmpfs file) mmapped by the target task. Unlike the case of |
-| | anonymous pages, file pages (and swaps) in the range mmapped by the task |
-| | will be moved even if the task hasn't done page fault, i.e. they might |
-| | not be the task's "RSS", but other task's "RSS" that maps the same file. |
-| | And mapcount of the page is ignored (the page can be moved even if |
-| | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to |
-| | enable move of swap charges. |
-+---+--------------------------------------------------------------------------+
-
-8.3 TODO
---------
-
-- All of moving charge operations are done under cgroup_mutex. It's not good
- behavior to hold the mutex too long, so we may need some trick.
+Reading memory.move_charge_at_immigrate will always return 0 and writing
+to it will always return -EINVAL.
9. Memory thresholds
====================
@@ -834,8 +779,10 @@ It's applicable for root and non-root cgroup.
.. _cgroup-v1-memory-oom-control:
-10. OOM Control
-===============
+10. OOM Control (DEPRECATED)
+============================
+
+THIS IS DEPRECATED!
memory.oom_control file is for OOM notification and other controls.
@@ -882,8 +829,10 @@ At reading, current status of OOM is shown.
The number of processes belonging to this cgroup killed by any
kind of OOM killer.
-11. Memory Pressure
-===================
+11. Memory Pressure (DEPRECATED)
+================================
+
+THIS IS DEPRECATED!
The pressure level notifications can be used to monitor the memory
allocation cost; based on the pressure, applications can implement
diff --git a/Documentation/admin-guide/cgroup-v1/pids.rst b/Documentation/admin-guide/cgroup-v1/pids.rst
index 6acebd9e72c8..0f9f9a7b1f6c 100644
--- a/Documentation/admin-guide/cgroup-v1/pids.rst
+++ b/Documentation/admin-guide/cgroup-v1/pids.rst
@@ -36,7 +36,8 @@ superset of parent/child/pids.current.
The pids.events file contains event counters:
- - max: Number of times fork failed because limit was hit.
+ - max: Number of times fork failed in the cgroup because limit was hit in
+ self or ancestors.
Example
-------
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 17e6e9565156..7f5b59d95fce 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -15,6 +15,9 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
.. CONTENTS
+ [Whenever any new section is added to this document, please also add
+ an entry here.]
+
1. Introduction
1-1. Terminology
1-2. What is cgroup?
@@ -25,9 +28,10 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
2-2-2. Threads
2-3. [Un]populated Notification
2-4. Controlling Controllers
- 2-4-1. Enabling and Disabling
- 2-4-2. Top-down Constraint
- 2-4-3. No Internal Process Constraint
+ 2-4-1. Availability
+ 2-4-2. Enabling and Disabling
+ 2-4-3. Top-down Constraint
+ 2-4-4. No Internal Process Constraint
2-5. Delegation
2-5-1. Model of Delegation
2-5-2. Delegation Containment
@@ -49,7 +53,8 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
5-2. Memory
5-2-1. Memory Interface Files
5-2-2. Usage Guidelines
- 5-2-3. Memory Ownership
+ 5-2-3. Reclaim Protection
+ 5-2-4. Memory Ownership
5-3. IO
5-3-1. IO Interface Files
5-3-2. Writeback
@@ -61,16 +66,18 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
5-4-1. PID Interface Files
5-5. Cpuset
5.5-1. Cpuset Interface Files
- 5-6. Device
+ 5-6. Device controller
5-7. RDMA
5-7-1. RDMA Interface Files
- 5-8. HugeTLB
- 5.8-1. HugeTLB Interface Files
- 5-9. Misc
- 5.9-1 Miscellaneous cgroup Interface Files
- 5.9-2 Migration and Ownership
- 5-10. Others
- 5-10-1. perf_event
+ 5-8. DMEM
+ 5-8-1. DMEM Interface Files
+ 5-9. HugeTLB
+ 5.9-1. HugeTLB Interface Files
+ 5-10. Misc
+ 5.10-1 Misc Interface Files
+ 5.10-2 Migration and Ownership
+ 5-11. Others
+ 5-11-1. perf_event
5-N. Non-normative information
5-N-1. CPU controller root cgroup process behaviour
5-N-2. IO controller root cgroup process behaviour
@@ -239,6 +246,13 @@ cgroup v2 currently supports the following mount options.
will not be tracked by the memory controller (even if cgroup
v2 is remounted later on).
+ pids_localevents
+ The option restores v1-like behavior of pids.events:max, that is only
+ local (inside cgroup proper) fork failures are counted. Without this
+ option pids.events.max represents any pids.max enforcemnt across
+ cgroup's subtree.
+
+
Organizing Processes and Threads
--------------------------------
@@ -427,6 +441,15 @@ both cgroups.
Controlling Controllers
-----------------------
+Availability
+~~~~~~~~~~~~
+
+A controller is available in a cgroup when it is supported by the kernel (i.e.,
+compiled in, not disabled and not attached to a v1 hierarchy) and listed in the
+"cgroup.controllers" file. Availability means the controller's interface files
+are exposed in the cgroup’s directory, allowing the distribution of the target
+resource to be observed or controlled within that cgroup.
+
Enabling and Disabling
~~~~~~~~~~~~~~~~~~~~~~
@@ -526,10 +549,12 @@ cgroup namespace on namespace creation.
Because the resource control interface files in a given directory
control the distribution of the parent's resources, the delegatee
shouldn't be allowed to write to them. For the first method, this is
-achieved by not granting access to these files. For the second, the
-kernel rejects writes to all files other than "cgroup.procs" and
-"cgroup.subtree_control" on a namespace root from inside the
-namespace.
+achieved by not granting access to these files. For the second, files
+outside the namespace should be hidden from the delegatee by the means
+of at least mount namespacing, and the kernel rejects writes to all
+files on a namespace root from inside the cgroup namespace, except for
+those files listed in "/sys/kernel/cgroup/delegate" (including
+"cgroup.procs", "cgroup.threads", "cgroup.subtree_control", etc.).
The end results are equivalent for both delegation types. Once
delegated, the user can build sub-hierarchy under the directory,
@@ -974,6 +999,32 @@ All cgroup core files are prefixed with "cgroup."
A dying cgroup can consume system resources not exceeding
limits, which were active at the moment of cgroup deletion.
+ nr_subsys_<cgroup_subsys>
+ Total number of live cgroup subsystems (e.g memory
+ cgroup) at and beneath the current cgroup.
+
+ nr_dying_subsys_<cgroup_subsys>
+ Total number of dying cgroup subsystems (e.g. memory
+ cgroup) at and beneath the current cgroup.
+
+ cgroup.stat.local
+ A read-only flat-keyed file which exists in non-root cgroups.
+ The following entry is defined:
+
+ frozen_usec
+ Cumulative time that this cgroup has spent between freezing and
+ thawing, regardless of whether by self or ancestor groups.
+ NB: (not) reaching "frozen" state is not accounted here.
+
+ Using the following ASCII representation of a cgroup's freezer
+ state, ::
+
+ 1 _____
+ frozen 0 __/ \__
+ ab cd
+
+ the duration being measured is the span between a and c.
+
cgroup.freeze
A read-write single value file which exists on non-root cgroups.
Allowed values are "0" and "1". The default is "0".
@@ -1058,30 +1109,53 @@ cpufreq governor about the minimum desired frequency which should always be
provided by a CPU, as well as the maximum desired frequency, which should not
be exceeded by a CPU.
-WARNING: cgroup2 doesn't yet support control of realtime processes and
-the cpu controller can only be enabled when all RT processes are in
-the root cgroup. Be aware that system management software may already
-have placed RT processes into nonroot cgroups during the system boot
-process, and these processes may need to be moved to the root cgroup
-before the cpu controller can be enabled.
+WARNING: cgroup2 cpu controller doesn't yet support the (bandwidth) control of
+realtime processes. For a kernel built with the CONFIG_RT_GROUP_SCHED option
+enabled for group scheduling of realtime processes, the cpu controller can only
+be enabled when all RT processes are in the root cgroup. Be aware that system
+management software may already have placed RT processes into non-root cgroups
+during the system boot process, and these processes may need to be moved to the
+root cgroup before the cpu controller can be enabled with a
+CONFIG_RT_GROUP_SCHED enabled kernel.
+
+With CONFIG_RT_GROUP_SCHED disabled, this limitation does not apply and some of
+the interface files either affect realtime processes or account for them. See
+the following section for details. Only the cpu controller is affected by
+CONFIG_RT_GROUP_SCHED. Other controllers can be used for the resource control of
+realtime processes irrespective of CONFIG_RT_GROUP_SCHED.
CPU Interface Files
~~~~~~~~~~~~~~~~~~~
-All time durations are in microseconds.
+The interaction of a process with the cpu controller depends on its scheduling
+policy and the underlying scheduler. From the point of view of the cpu controller,
+processes can be categorized as follows:
+
+* Processes under the fair-class scheduler
+* Processes under a BPF scheduler with the ``cgroup_set_weight`` callback
+* Everything else: ``SCHED_{FIFO,RR,DEADLINE}`` and processes under a BPF scheduler
+ without the ``cgroup_set_weight`` callback
+
+For details on when a process is under the fair-class scheduler or a BPF scheduler,
+check out :ref:`Documentation/scheduler/sched-ext.rst <sched-ext>`.
+
+For each of the following interface files, the above categories
+will be referred to. All time durations are in microseconds.
cpu.stat
A read-only flat-keyed file.
This file exists whether the controller is enabled or not.
- It always reports the following three stats:
+ It always reports the following three stats, which account for all the
+ processes in the cgroup:
- usage_usec
- user_usec
- system_usec
- and the following five when the controller is enabled:
+ and the following five when the controller is enabled, which account for
+ only the processes under the fair-class scheduler:
- nr_periods
- nr_throttled
@@ -1099,6 +1173,10 @@ All time durations are in microseconds.
If the cgroup has been configured to be SCHED_IDLE (cpu.idle = 1),
then the weight will show as a 0.
+ This file affects only processes under the fair-class scheduler and a BPF
+ scheduler with the ``cgroup_set_weight`` callback depending on what the
+ callback actually does.
+
cpu.weight.nice
A read-write single value file which exists on non-root
cgroups. The default is "0".
@@ -1111,6 +1189,10 @@ All time durations are in microseconds.
granularity is coarser for the nice values, the read value is
the closest approximation of the current weight.
+ This file affects only processes under the fair-class scheduler and a BPF
+ scheduler with the ``cgroup_set_weight`` callback depending on what the
+ callback actually does.
+
cpu.max
A read-write two value file which exists on non-root cgroups.
The default is "max 100000".
@@ -1123,43 +1205,55 @@ All time durations are in microseconds.
$PERIOD duration. "max" for $MAX indicates no limit. If only
one number is written, $MAX is updated.
+ This file affects only processes under the fair-class scheduler.
+
cpu.max.burst
A read-write single value file which exists on non-root
cgroups. The default is "0".
The burst in the range [0, $MAX].
+ This file affects only processes under the fair-class scheduler.
+
cpu.pressure
A read-write nested-keyed file.
Shows pressure stall information for CPU. See
:ref:`Documentation/accounting/psi.rst <psi>` for details.
+ This file accounts for all the processes in the cgroup.
+
cpu.uclamp.min
- A read-write single value file which exists on non-root cgroups.
- The default is "0", i.e. no utilization boosting.
+ A read-write single value file which exists on non-root cgroups.
+ The default is "0", i.e. no utilization boosting.
- The requested minimum utilization (protection) as a percentage
- rational number, e.g. 12.34 for 12.34%.
+ The requested minimum utilization (protection) as a percentage
+ rational number, e.g. 12.34 for 12.34%.
- This interface allows reading and setting minimum utilization clamp
- values similar to the sched_setattr(2). This minimum utilization
- value is used to clamp the task specific minimum utilization clamp.
+ This interface allows reading and setting minimum utilization clamp
+ values similar to the sched_setattr(2). This minimum utilization
+ value is used to clamp the task specific minimum utilization clamp,
+ including those of realtime processes.
- The requested minimum utilization (protection) is always capped by
- the current value for the maximum utilization (limit), i.e.
- `cpu.uclamp.max`.
+ The requested minimum utilization (protection) is always capped by
+ the current value for the maximum utilization (limit), i.e.
+ `cpu.uclamp.max`.
+
+ This file affects all the processes in the cgroup.
cpu.uclamp.max
- A read-write single value file which exists on non-root cgroups.
- The default is "max". i.e. no utilization capping
+ A read-write single value file which exists on non-root cgroups.
+ The default is "max". i.e. no utilization capping
+
+ The requested maximum utilization (limit) as a percentage rational
+ number, e.g. 98.76 for 98.76%.
- The requested maximum utilization (limit) as a percentage rational
- number, e.g. 98.76 for 98.76%.
+ This interface allows reading and setting maximum utilization clamp
+ values similar to the sched_setattr(2). This maximum utilization
+ value is used to clamp the task specific maximum utilization clamp,
+ including those of realtime processes.
- This interface allows reading and setting maximum utilization clamp
- values similar to the sched_setattr(2). This maximum utilization
- value is used to clamp the task specific maximum utilization clamp.
+ This file affects all the processes in the cgroup.
cpu.idle
A read-write single value file which exists on non-root cgroups.
@@ -1171,7 +1265,7 @@ All time durations are in microseconds.
own relative priorities, but the cgroup itself will be treated as
very low priority relative to its peers.
-
+ This file affects only processes under the fair-class scheduler.
Memory
------
@@ -1224,7 +1318,7 @@ PAGE_SIZE multiple when read back.
smaller overages.
Effective min boundary is limited by memory.min values of
- all ancestor cgroups. If there is memory.min overcommitment
+ ancestor cgroups. If there is memory.min overcommitment
(child cgroup or cgroups are requiring more protected memory
than parent will allow), then each child cgroup will get
the part of parent's protection proportional to its
@@ -1233,9 +1327,6 @@ PAGE_SIZE multiple when read back.
Putting more memory than generally available under this
protection is discouraged and may lead to constant OOMs.
- If a memory cgroup is not populated with processes,
- its memory.min is ignored.
-
memory.low
A read-write single value file which exists on non-root
cgroups. The default is "0".
@@ -1250,7 +1341,7 @@ PAGE_SIZE multiple when read back.
smaller overages.
Effective low boundary is limited by memory.low values of
- all ancestor cgroups. If there is memory.low overcommitment
+ ancestor cgroups. If there is memory.low overcommitment
(child cgroup or cgroups are requiring more protected memory
than parent will allow), then each child cgroup will get
the part of parent's protection proportional to its
@@ -1273,6 +1364,18 @@ PAGE_SIZE multiple when read back.
monitors the limited cgroup to alleviate heavy reclaim
pressure.
+ If memory.high is opened with O_NONBLOCK then the synchronous
+ reclaim is bypassed. This is useful for admin processes that
+ need to dynamically adjust the job's memory limits without
+ expending their own CPU resources on memory reclamation. The
+ job will trigger the reclaim and/or get throttled on its
+ next charge request.
+
+ Please note that with O_NONBLOCK, there is a chance that the
+ target memory cgroup may take indefinite amount of time to
+ reduce usage below the limit due to delayed charge request or
+ busy-hitting its memory to slow down reclaim.
+
memory.max
A read-write single value file which exists on non-root
cgroups. The default is "max".
@@ -1290,23 +1393,28 @@ PAGE_SIZE multiple when read back.
Caller could retry them differently, return into userspace
as -ENOMEM or silently ignore in cases like disk readahead.
+ If memory.max is opened with O_NONBLOCK, then the synchronous
+ reclaim and oom-kill are bypassed. This is useful for admin
+ processes that need to dynamically adjust the job's memory limits
+ without expending their own CPU resources on memory reclamation.
+ The job will trigger the reclaim and/or oom-kill on its next
+ charge request.
+
+ Please note that with O_NONBLOCK, there is a chance that the
+ target memory cgroup may take indefinite amount of time to
+ reduce usage below the limit due to delayed charge request or
+ busy-hitting its memory to slow down reclaim.
+
memory.reclaim
A write-only nested-keyed file which exists for all cgroups.
This is a simple interface to trigger memory reclaim in the
target cgroup.
- This file accepts a single key, the number of bytes to reclaim.
- No nested keys are currently supported.
-
Example::
echo "1G" > memory.reclaim
- The interface can be later extended with nested keys to
- configure the reclaim behavior. For example, specify the
- type of memory to reclaim from (anon, file, ..).
-
Please note that the kernel can over or under reclaim from
the target cgroup. If less bytes are reclaimed than the
specified amount, -EAGAIN is returned.
@@ -1318,12 +1426,29 @@ PAGE_SIZE multiple when read back.
This means that the networking layer will not adapt based on
reclaim induced by memory.reclaim.
+The following nested keys are defined.
+
+ ========== ================================
+ swappiness Swappiness value to reclaim with
+ ========== ================================
+
+ Specifying a swappiness value instructs the kernel to perform
+ the reclaim with that swappiness value. Note that this has the
+ same semantics as vm.swappiness applied to memcg reclaim with
+ all the existing limitations and potential future extensions.
+
+ The valid range for swappiness is [0-200, max], setting
+ swappiness=max exclusively reclaims anonymous memory.
+
memory.peak
- A read-only single value file which exists on non-root
- cgroups.
+ A read-write single value file which exists on non-root cgroups.
- The max memory usage recorded for the cgroup and its
- descendants since the creation of the cgroup.
+ The max memory usage recorded for the cgroup and its descendants since
+ either the creation of the cgroup or the most recent reset for that FD.
+
+ A write of any non-empty string to this file resets it to the
+ current memory usage for subsequent reads through the same
+ file descriptor.
memory.oom.group
A read-write single value file which exists on non-root
@@ -1388,6 +1513,10 @@ PAGE_SIZE multiple when read back.
oom_group_kill
The number of times a group OOM has occurred.
+ sock_throttled
+ The number of times network sockets associated with
+ this cgroup are throttled.
+
memory.events.local
Similar to memory.events but the fields in the file are local
to the cgroup i.e. not hierarchical. The file modified event
@@ -1412,7 +1541,10 @@ PAGE_SIZE multiple when read back.
anon
Amount of memory used in anonymous mappings such as
- brk(), sbrk(), and mmap(MAP_ANONYMOUS)
+ brk(), sbrk(), and mmap(MAP_ANONYMOUS). Note that
+ some kernel configurations might account complete larger
+ allocations (e.g., THP) if only some, but not all the
+ memory of such an allocation is mapped anymore.
file
Amount of memory used to cache filesystem data,
@@ -1432,7 +1564,7 @@ PAGE_SIZE multiple when read back.
sec_pagetables
Amount of memory allocated for secondary page tables,
this currently includes KVM mmu allocations on x86
- and arm64.
+ and arm64 and IOMMU page tables.
percpu (npn)
Amount of memory used for storing per-cpu kernel
@@ -1455,7 +1587,10 @@ PAGE_SIZE multiple when read back.
Amount of application memory swapped out to zswap.
file_mapped
- Amount of cached filesystem data mapped with mmap()
+ Amount of cached filesystem data mapped with mmap(). Note
+ that some kernel configurations might account complete
+ larger allocations (e.g., THP) if only some, but not
+ not all the memory of such an allocation is mapped.
file_dirty
Amount of cached filesystem data that was modified but
@@ -1527,6 +1662,12 @@ PAGE_SIZE multiple when read back.
workingset_nodereclaim
Number of times a shadow node has been reclaimed
+ pswpin (npn)
+ Number of pages swapped into memory
+
+ pswpout (npn)
+ Number of pages swapped out of memory
+
pgscan (npn)
Amount of scanned pages (in an inactive LRU list)
@@ -1542,6 +1683,9 @@ PAGE_SIZE multiple when read back.
pgscan_khugepaged (npn)
Amount of scanned pages by khugepaged (in an inactive LRU list)
+ pgscan_proactive (npn)
+ Amount of scanned pages proactively (in an inactive LRU list)
+
pgsteal_kswapd (npn)
Amount of reclaimed pages by kswapd
@@ -1551,6 +1695,9 @@ PAGE_SIZE multiple when read back.
pgsteal_khugepaged (npn)
Amount of reclaimed pages by khugepaged
+ pgsteal_proactive (npn)
+ Amount of reclaimed pages proactively
+
pgfault (npn)
Total number of page faults incurred
@@ -1572,6 +1719,24 @@ PAGE_SIZE multiple when read back.
pglazyfreed (npn)
Amount of reclaimed lazyfree pages
+ swpin_zero
+ Number of pages swapped into memory and filled with zero, where I/O
+ was optimized out because the page content was detected to be zero
+ during swapout.
+
+ swpout_zero
+ Number of zero-filled pages swapped out with I/O skipped due to the
+ content being detected as zero.
+
+ zswpin
+ Number of pages moved in to memory from zswap.
+
+ zswpout
+ Number of pages moved out of memory to zswap.
+
+ zswpwb
+ Number of pages written from zswap to swap.
+
thp_fault_alloc (npn)
Number of transparent hugepages which were allocated to satisfy
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
@@ -1591,6 +1756,33 @@ PAGE_SIZE multiple when read back.
Usually because failed to allocate some continuous swap space
for the huge page.
+ numa_pages_migrated (npn)
+ Number of pages migrated by NUMA balancing.
+
+ numa_pte_updates (npn)
+ Number of pages whose page table entries are modified by
+ NUMA balancing to produce NUMA hinting faults on access.
+
+ numa_hint_faults (npn)
+ Number of NUMA hinting faults.
+
+ pgdemote_kswapd
+ Number of pages demoted by kswapd.
+
+ pgdemote_direct
+ Number of pages demoted directly.
+
+ pgdemote_khugepaged
+ Number of pages demoted by khugepaged.
+
+ pgdemote_proactive
+ Number of pages demoted by proactively.
+
+ hugetlb
+ Amount of memory used by hugetlb pages. This metric only shows
+ up if hugetlb usage is accounted for in memory.current (i.e.
+ cgroup is mounted with the memory_hugetlb_accounting option).
+
memory.numa_stat
A read-only nested-keyed file which exists on non-root cgroups.
@@ -1640,11 +1832,14 @@ PAGE_SIZE multiple when read back.
Healthy workloads are not expected to reach this limit.
memory.swap.peak
- A read-only single value file which exists on non-root
- cgroups.
+ A read-write single value file which exists on non-root cgroups.
- The max swap usage recorded for the cgroup and its
- descendants since the creation of the cgroup.
+ The max swap usage recorded for the cgroup and its descendants since
+ the creation of the cgroup or the most recent reset for that FD.
+
+ A write of any non-empty string to this file resets it to the
+ current memory usage for subsequent reads through the same
+ file descriptor.
memory.swap.max
A read-write single value file which exists on non-root
@@ -1694,9 +1889,10 @@ PAGE_SIZE multiple when read back.
entries fault back in or are written out to disk.
memory.zswap.writeback
- A read-write single value file. The default value is "1". The
- initial value of the root cgroup is 1, and when a new cgroup is
- created, it inherits the current value of its parent.
+ A read-write single value file. The default value is "1".
+ Note that this setting is hierarchical, i.e. the writeback would be
+ implicitly disabled for child cgroups if the upper hierarchy
+ does so.
When this is set to 0, all swapping attempts to swapping devices
are disabled. This included both zswap writebacks, and swapping due
@@ -1707,6 +1903,8 @@ PAGE_SIZE multiple when read back.
Note that this is subtly different from setting memory.swap.max to
0, as it still allows for pages to be written to the zswap pool.
+ This setting has no effect if zswap is disabled, and swapping
+ is allowed unless memory.swap.max is set to 0.
memory.pressure
A read-only nested-keyed file.
@@ -1738,6 +1936,27 @@ memory - is necessary to determine whether a workload needs more
memory; unfortunately, memory pressure monitoring mechanism isn't
implemented yet.
+Reclaim Protection
+~~~~~~~~~~~~~~~~~~
+
+The protection configured with "memory.low" or "memory.min" applies relatively
+to the target of the reclaim (i.e. any of memory cgroup limits, proactive
+memory.reclaim or global reclaim apparently located in the root cgroup).
+The protection value configured for B applies unchanged to the reclaim
+targeting A (i.e. caused by competition with the sibling E)::
+
+ root - ... - A - B - C
+ \ ` D
+ ` E
+
+When the reclaim targets ancestors of A, the effective protection of B is
+capped by the protection value configured for A (and any other intermediate
+ancestors between A and the target).
+
+To express indifference about relative sibling protection, it is suggested to
+use memory_recursiveprot. Configuring all descendants of a parent with finite
+protection to "max" works but it may unnecessarily skew memory.events:low
+field.
Memory Ownership
~~~~~~~~~~~~~~~~
@@ -2181,11 +2400,31 @@ PID Interface Files
Hard limit of number of processes.
pids.current
- A read-only single value file which exists on all cgroups.
+ A read-only single value file which exists on non-root cgroups.
The number of processes currently in the cgroup and its
descendants.
+ pids.peak
+ A read-only single value file which exists on non-root cgroups.
+
+ The maximum value that the number of processes in the cgroup and its
+ descendants has ever reached.
+
+ pids.events
+ A read-only flat-keyed file which exists on non-root cgroups. Unless
+ specified otherwise, a value change in this file generates a file
+ modified event. The following entries are defined.
+
+ max
+ The number of times the cgroup's total number of processes hit the pids.max
+ limit (see also pids_localevents).
+
+ pids.events.local
+ Similar to pids.events but the fields in the file are local
+ to the cgroup i.e. not hierarchical. The file modified event
+ generated on this file reflects only the local events.
+
Organisational operations are not blocked by cgroup policies, so it is
possible to have pids.current > pids.max. This can be done by either
setting the limit to be smaller than pids.current, or attaching enough
@@ -2320,8 +2559,12 @@ Cpuset Interface Files
is always a subset of it.
Users can manually set it to a value that is different from
- "cpuset.cpus". The only constraint in setting it is that the
- list of CPUs must be exclusive with respect to its sibling.
+ "cpuset.cpus". One constraint in setting it is that the list of
+ CPUs must be exclusive with respect to "cpuset.cpus.exclusive"
+ of its sibling. If "cpuset.cpus.exclusive" of a sibling cgroup
+ isn't set, its "cpuset.cpus" value, if set, cannot be a subset
+ of it to leave at least one CPU available when the exclusive
+ CPUs are taken away.
For a parent cgroup, any one of its exclusive CPUs can only
be distributed to at most one of its child cgroups. Having an
@@ -2337,8 +2580,8 @@ Cpuset Interface Files
cpuset-enabled cgroups.
This file shows the effective set of exclusive CPUs that
- can be used to create a partition root. The content of this
- file will always be a subset of "cpuset.cpus" and its parent's
+ can be used to create a partition root. The content
+ of this file will always be a subset of its parent's
"cpuset.cpus.exclusive.effective" if its parent is not the root
cgroup. It will also be a subset of "cpuset.cpus.exclusive"
if it is set. If "cpuset.cpus.exclusive" is not set, it is
@@ -2527,6 +2770,49 @@ RDMA Interface Files
mlx4_0 hca_handle=1 hca_object=20
ocrdma1 hca_handle=1 hca_object=23
+DMEM
+----
+
+The "dmem" controller regulates the distribution and accounting of
+device memory regions. Because each memory region may have its own page size,
+which does not have to be equal to the system page size, the units are always bytes.
+
+DMEM Interface Files
+~~~~~~~~~~~~~~~~~~~~
+
+ dmem.max, dmem.min, dmem.low
+ A readwrite nested-keyed file that exists for all the cgroups
+ except root that describes current configured resource limit
+ for a region.
+
+ An example for xe follows::
+
+ drm/0000:03:00.0/vram0 1073741824
+ drm/0000:03:00.0/stolen max
+
+ The semantics are the same as for the memory cgroup controller, and are
+ calculated in the same way.
+
+ dmem.capacity
+ A read-only file that describes maximum region capacity.
+ It only exists on the root cgroup. Not all memory can be
+ allocated by cgroups, as the kernel reserves some for
+ internal use.
+
+ An example for xe follows::
+
+ drm/0000:03:00.0/vram0 8514437120
+ drm/0000:03:00.0/stolen 67108864
+
+ dmem.current
+ A read-only file that describes current resource usage.
+ It exists for all the cgroup except root.
+
+ An example for xe follows::
+
+ drm/0000:03:00.0/vram0 12550144
+ drm/0000:03:00.0/stolen 8650752
+
HugeTLB
-------
@@ -2599,6 +2885,15 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_
res_a 3
res_b 0
+ misc.peak
+ A read-only flat-keyed file shown in all cgroups. It shows the
+ historical maximum usage of the resources in the cgroup and its
+ children.::
+
+ $ cat misc.peak
+ res_a 10
+ res_b 8
+
misc.max
A read-write flat-keyed file shown in the non root cgroups. Allowed
maximum usage of the resources in the cgroup and its children.::
@@ -2628,6 +2923,11 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_
The number of times the cgroup's resource usage was
about to go over the max boundary.
+ misc.events.local
+ Similar to misc.events but the fields in the file are local to the
+ cgroup i.e. not hierarchical. The file modified event generated on
+ this file reflects only the local events.
+
Migration and Ownership
~~~~~~~~~~~~~~~~~~~~~~~
@@ -2836,7 +3136,7 @@ Filesystem Support for Writeback
--------------------------------
A filesystem can support cgroup writeback by updating
-address_space_operations->writepage[s]() to annotate bio's using the
+address_space_operations->writepages() to annotate bio's using the
following two functions.
wbc_init_bio(@wbc, @bio)
@@ -2846,7 +3146,7 @@ following two functions.
a queue (device) has been associated with the bio and
before submission.
- wbc_account_cgroup_owner(@wbc, @page, @bytes)
+ wbc_account_cgroup_owner(@wbc, @folio, @bytes)
Should be called for each data segment being written out.
While this function doesn't care exactly when it's called
during the writeback session, it's the easiest and most
@@ -2878,8 +3178,8 @@ Deprecated v1 Core Features
- "cgroup.clone_children" is removed.
-- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file
- at the root instead.
+- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" or
+ "cgroup.stat" files at the root instead.
Issues with v1 and Rationales for v2
diff --git a/Documentation/admin-guide/cifs/introduction.rst b/Documentation/admin-guide/cifs/introduction.rst
index 53ea62906aa5..ffc6e2564dd5 100644
--- a/Documentation/admin-guide/cifs/introduction.rst
+++ b/Documentation/admin-guide/cifs/introduction.rst
@@ -28,7 +28,7 @@ Introduction
high performance safe distributed caching (leases/oplocks), optional packet
signing, large files, Unicode support and other internationalization
improvements. Since both Samba server and this filesystem client support the
- CIFS Unix extensions, and the Linux client also suppors SMB3 POSIX extensions,
+ CIFS Unix extensions, and the Linux client also supports SMB3 POSIX extensions,
the combination can provide a reasonable alternative to other network and
cluster file systems for fileserving in some Linux to Linux environments,
not just in Linux to Windows (or Linux to Mac) environments.
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index aa8290a29dc8..d989ae5778ba 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -270,6 +270,8 @@ configured for Unix Extensions (and the client has not disabled
illegal Windows/NTFS/SMB characters to a remap range (this mount parameter
is the default for SMB3). This remap (``mapposix``) range is also
compatible with Mac (and "Services for Mac" on some older Windows).
+When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically
+disabled.
CIFS VFS Mount Options
======================
@@ -723,40 +725,26 @@ Configuration pseudo-files:
======================= =======================================================
SecurityFlags Flags which control security negotiation and
also packet signing. Authentication (may/must)
- flags (e.g. for NTLM and/or NTLMv2) may be combined with
+ flags (e.g. for NTLMv2) may be combined with
the signing flags. Specifying two different password
hashing mechanisms (as "must use") on the other hand
does not make much sense. Default flags are::
- 0x07007
-
- (NTLM, NTLMv2 and packet signing allowed). The maximum
- allowable flags if you want to allow mounts to servers
- using weaker password hashes is 0x37037 (lanman,
- plaintext, ntlm, ntlmv2, signing allowed). Some
- SecurityFlags require the corresponding menuconfig
- options to be enabled. Enabling plaintext
- authentication currently requires also enabling
- lanman authentication in the security flags
- because the cifs module only supports sending
- laintext passwords using the older lanman dialect
- form of the session setup SMB. (e.g. for authentication
- using plain text passwords, set the SecurityFlags
- to 0x30030)::
+ 0x00C5
+
+ (NTLMv2 and packet signing allowed). Some SecurityFlags
+ may require enabling a corresponding menuconfig option.
may use packet signing 0x00001
must use packet signing 0x01001
- may use NTLM (most common password hash) 0x00002
- must use NTLM 0x02002
may use NTLMv2 0x00004
must use NTLMv2 0x04004
- may use Kerberos security 0x00008
- must use Kerberos 0x08008
- may use lanman (weak) password hash 0x00010
- must use lanman password hash 0x10010
- may use plaintext passwords 0x00020
- must use plaintext passwords 0x20020
- (reserved for future packet encryption) 0x00040
+ may use Kerberos security (krb5) 0x00008
+ must use Kerberos 0x08008
+ may use NTLMSSP 0x00080
+ must use NTLMSSP 0x80080
+ seal (packet encryption) 0x00040
+ must seal 0x40040
cifsFYI If set to non-zero value, additional debug information
will be logged to the system error log. This field
diff --git a/Documentation/admin-guide/device-mapper/delay.rst b/Documentation/admin-guide/device-mapper/delay.rst
index 917ba8c33359..a1e673c0e782 100644
--- a/Documentation/admin-guide/device-mapper/delay.rst
+++ b/Documentation/admin-guide/device-mapper/delay.rst
@@ -3,29 +3,52 @@ dm-delay
========
Device-Mapper's "delay" target delays reads and/or writes
-and maps them to different devices.
+and/or flushes and optionally maps them to different devices.
-Parameters::
+Arguments::
<device> <offset> <delay> [<write_device> <write_offset> <write_delay>
[<flush_device> <flush_offset> <flush_delay>]]
-With separate write parameters, the first set is only used for reads.
+Table line has to either have 3, 6 or 9 arguments:
+
+3: apply offset and delay to read, write and flush operations on device
+
+6: apply offset and delay to device, also apply write_offset and write_delay
+ to write and flush operations on optionally different write_device with
+ optionally different sector offset
+
+9: same as 6 arguments plus define flush_offset and flush_delay explicitly
+ on/with optionally different flush_device/flush_offset.
+
Offsets are specified in sectors.
+
Delays are specified in milliseconds.
+
Example scripts
===============
::
-
#!/bin/sh
- # Create device delaying rw operation for 500ms
- echo "0 `blockdev --getsz $1` delay $1 0 500" | dmsetup create delayed
+ #
+ # Create mapped device named "delayed" delaying read, write and flush operations for 500ms.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 500"
::
+ #!/bin/sh
+ #
+ # Create mapped device delaying write and flush operations for 400ms and
+ # splitting reads to device $1 but writes and flushes to different device $2
+ # to different offsets of 2048 and 4096 sectors respectively.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 2048 0 $2 4096 400"
+::
#!/bin/sh
- # Create device delaying only write operation for 500ms and
- # splitting reads and writes to different devices $1 $2
- echo "0 `blockdev --getsz $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
+ #
+ # Create mapped device delaying reads for 50ms, writes for 100ms and flushes for 333ms
+ # onto the same backing device at offset 0 sectors.
+ #
+ dmsetup create delayed --table "0 `blockdev --getsz $1` delay $1 0 50 $2 0 100 $1 0 333"
diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
index aa2d04d95df6..4467f6d4b632 100644
--- a/Documentation/admin-guide/device-mapper/dm-crypt.rst
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
@@ -113,6 +113,11 @@ same_cpu_crypt
The default is to use an unbound workqueue so that encryption work
is automatically balanced between available CPUs.
+high_priority
+ Set dm-crypt workqueues and the writer thread to high priority. This
+ improves throughput and latency of dm-crypt while degrading general
+ responsiveness of the system.
+
submit_from_crypt_cpus
Disable offloading writes to a separate thread after encryption.
There are some situations where offloading write bios from the
@@ -141,6 +146,11 @@ integrity:<bytes>:<type>
integrity for the encrypted device. The additional space is then
used for storing authentication tag (and persistent IV if needed).
+integrity_key_size:<bytes>
+ Optionally set the integrity key size if it differs from the digest size.
+ It allows the use of wrapped key algorithms where the key size is
+ independent of the cryptographic key size.
+
sector_size:<bytes>
Use <bytes> as the encryption unit instead of 512 bytes sectors.
This option can be in range 512 - 4096 bytes and must be power of two.
@@ -155,6 +165,27 @@ iv_large_sectors
The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
if this flag is specified.
+integrity_key_size:<bytes>
+ Use an integrity key of <bytes> size instead of using an integrity key size
+ of the digest size of the used HMAC algorithm.
+
+
+Module parameters::
+ max_read_size
+ Maximum size of read requests. When a request larger than this size
+ is received, dm-crypt will split the request. The splitting improves
+ concurrency (the split requests could be encrypted in parallel by multiple
+ cores), but it also causes overhead. The user should tune this parameters to
+ fit the actual workload.
+
+ max_write_size
+ Maximum size of write requests. When a request larger than this size
+ is received, dm-crypt will split the request. The splitting improves
+ concurrency (the split requests could be encrypted in parallel by multiple
+ cores), but it also causes overhead. The user should tune this parameters to
+ fit the actual workload.
+
+
Example scripts
===============
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst
index d8a5f14d0e3c..c2e18ecc065c 100644
--- a/Documentation/admin-guide/device-mapper/dm-integrity.rst
+++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst
@@ -92,6 +92,11 @@ Target arguments:
allowed. This mode is useful for data recovery if the
device cannot be activated in any of the other standard
modes.
+ I - inline mode - in this mode, dm-integrity will store integrity
+ data directly in the underlying device sectors.
+ The underlying device must have an integrity profile that
+ allows storing user integrity data and provides enough
+ space for the selected integrity tag.
5. the number of additional arguments
diff --git a/Documentation/admin-guide/device-mapper/dm-pcache.rst b/Documentation/admin-guide/device-mapper/dm-pcache.rst
new file mode 100644
index 000000000000..09d327ef4b14
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/dm-pcache.rst
@@ -0,0 +1,202 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================
+dm-pcache — Persistent Cache
+=================================
+
+*Author: Dongsheng Yang <dongsheng.yang@linux.dev>*
+
+This document describes *dm-pcache*, a Device-Mapper target that lets a
+byte-addressable *DAX* (persistent-memory, “pmem”) region act as a
+high-performance, crash-persistent cache in front of a slower block
+device. The code lives in `drivers/md/dm-pcache/`.
+
+Quick feature summary
+=====================
+
+* *Write-back* caching (only mode currently supported).
+* *16 MiB segments* allocated on the pmem device.
+* *Data CRC32* verification (optional, per cache).
+* Crash-safe: every metadata structure is duplicated (`PCACHE_META_INDEX_MAX
+ == 2`) and protected with CRC+sequence numbers.
+* *Multi-tree indexing* (indexing trees sharded by logical address) for high PMem parallelism
+* Pure *DAX path* I/O – no extra BIO round-trips
+* *Log-structured write-back* that preserves backend crash-consistency
+
+
+Constructor
+===========
+
+::
+
+ pcache <cache_dev> <backing_dev> [<number_of_optional_arguments> <cache_mode writeback> <data_crc true|false>]
+
+========================= ====================================================
+``cache_dev`` Any DAX-capable block device (``/dev/pmem0``…).
+ All metadata *and* cached blocks are stored here.
+
+``backing_dev`` The slow block device to be cached.
+
+``cache_mode`` Optional, Only ``writeback`` is accepted at the
+ moment.
+
+``data_crc`` Optional, default to ``false``
+
+ * ``true`` – store CRC32 for every cached entry
+ and verify on reads
+ * ``false`` – skip CRC (faster)
+========================= ====================================================
+
+Example
+-------
+
+.. code-block:: shell
+
+ dmsetup create pcache_sdb --table \
+ "0 $(blockdev --getsz /dev/sdb) pcache /dev/pmem0 /dev/sdb 4 cache_mode writeback data_crc true"
+
+The first time a pmem device is used, dm-pcache formats it automatically
+(super-block, cache_info, etc.).
+
+
+Status line
+===========
+
+``dmsetup status <device>`` (``STATUSTYPE_INFO``) prints:
+
+::
+
+ <sb_flags> <seg_total> <cache_segs> <segs_used> \
+ <gc_percent> <cache_flags> \
+ <key_head_seg>:<key_head_off> \
+ <dirty_tail_seg>:<dirty_tail_off> \
+ <key_tail_seg>:<key_tail_off>
+
+Field meanings
+--------------
+
+=============================== =============================================
+``sb_flags`` Super-block flags (e.g. endian marker).
+
+``seg_total`` Number of physical *pmem* segments.
+
+``cache_segs`` Number of segments used for cache.
+
+``segs_used`` Segments currently allocated (bitmap weight).
+
+``gc_percent`` Current GC high-water mark (0-90).
+
+``cache_flags`` Bit 0 – DATA_CRC enabled
+ Bit 1 – INIT_DONE (cache initialised)
+ Bits 2-5 – cache mode (0 == WB).
+
+``key_head`` Where new key-sets are being written.
+
+``dirty_tail`` First dirty key-set that still needs
+ write-back to the backing device.
+
+``key_tail`` First key-set that may be reclaimed by GC.
+=============================== =============================================
+
+
+Messages
+========
+
+*Change GC trigger*
+
+::
+
+ dmsetup message <dev> 0 gc_percent <0-90>
+
+
+Theory of operation
+===================
+
+Sub-devices
+-----------
+
+==================== =========================================================
+backing_dev Any block device (SSD/HDD/loop/LVM, etc.).
+cache_dev DAX device; must expose direct-access memory.
+==================== =========================================================
+
+Segments and key-sets
+---------------------
+
+* The pmem space is divided into *16 MiB segments*.
+* Each write allocates space from a per-CPU *data_head* inside a segment.
+* A *cache-key* records a logical range on the origin and where it lives
+ inside pmem (segment + offset + generation).
+* 128 keys form a *key-set* (kset); ksets are written sequentially in pmem
+ and are themselves crash-safe (CRC).
+* The pair *(key_tail, dirty_tail)* delimit clean/dirty and live/dead ksets.
+
+Write-back
+----------
+
+Dirty keys are queued into a tree; a background worker copies data
+back to the backing_dev and advances *dirty_tail*. A FLUSH/FUA bio from the
+upper layers forces an immediate metadata commit.
+
+Garbage collection
+------------------
+
+GC starts when ``segs_used >= seg_total * gc_percent / 100``. It walks
+from *key_tail*, frees segments whose every key has been invalidated, and
+advances *key_tail*.
+
+CRC verification
+----------------
+
+If ``data_crc is enabled`` dm-pcache computes a CRC32 over every cached data
+range when it is inserted and stores it in the on-media key. Reads
+validate the CRC before copying to the caller.
+
+
+Failure handling
+================
+
+* *pmem media errors* – all metadata copies are read with
+ ``copy_mc_to_kernel``; an uncorrectable error logs and aborts initialisation.
+* *Cache full* – if no free segment can be found, writes return ``-EBUSY``;
+ dm-pcache retries internally (request deferral).
+* *System crash* – on attach, the driver replays ksets from *key_tail* to
+ rebuild the in-core trees; every segment’s generation guards against
+ use-after-free keys.
+
+
+Limitations & TODO
+==================
+
+* Only *write-back* mode; other modes planned.
+* Only FIFO cache invalidate; other (LRU, ARC...) planned.
+* Table reload is not supported currently.
+* Discard planned.
+
+
+Example workflow
+================
+
+.. code-block:: shell
+
+ # 1. Create devices
+ dmsetup create pcache_sdb --table \
+ "0 $(blockdev --getsz /dev/sdb) pcache /dev/pmem0 /dev/sdb 4 cache_mode writeback data_crc true"
+
+ # 2. Put a filesystem on top
+ mkfs.ext4 /dev/mapper/pcache_sdb
+ mount /dev/mapper/pcache_sdb /mnt
+
+ # 3. Tune GC threshold to 80 %
+ dmsetup message pcache_sdb 0 gc_percent 80
+
+ # 4. Observe status
+ watch -n1 'dmsetup status pcache_sdb'
+
+ # 5. Shutdown
+ umount /mnt
+ dmsetup remove pcache_sdb
+
+
+``dm-pcache`` is under active development; feedback, bug reports and patches
+are very welcome!
diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst
index cde52cc09645..f1c1f4b824ba 100644
--- a/Documentation/admin-guide/device-mapper/index.rst
+++ b/Documentation/admin-guide/device-mapper/index.rst
@@ -18,6 +18,7 @@ Device Mapper
dm-integrity
dm-io
dm-log
+ dm-pcache
dm-queue-length
dm-raid
dm-service-time
@@ -34,6 +35,8 @@ Device Mapper
switch
thin-provisioning
unstriped
+ vdo-design
+ vdo
verity
writecache
zero
diff --git a/Documentation/admin-guide/device-mapper/thin-provisioning.rst b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
index bafebf79da4b..b2fa49a5608a 100644
--- a/Documentation/admin-guide/device-mapper/thin-provisioning.rst
+++ b/Documentation/admin-guide/device-mapper/thin-provisioning.rst
@@ -80,11 +80,11 @@ less sharing than average you'll need a larger-than-average metadata device.
As a guide, we suggest you calculate the number of bytes to use in the
metadata device as 48 * $data_dev_size / $data_block_size but round it up
-to 2MB if the answer is smaller. If you're creating large numbers of
+to 2MiB if the answer is smaller. If you're creating large numbers of
snapshots which are recording large amounts of change, you may find you
need to increase this.
-The largest size supported is 16GB: If the device is larger,
+The largest size supported is 16GiB: If the device is larger,
a warning will be issued and the excess space will not be used.
Reloading a pool table
@@ -107,13 +107,13 @@ Using an existing pool device
$data_block_size gives the smallest unit of disk space that can be
allocated at a time expressed in units of 512-byte sectors.
-$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
-multiple of 128 (64KB). $data_block_size cannot be changed after the
+$data_block_size must be between 128 (64KiB) and 2097152 (1GiB) and a
+multiple of 128 (64KiB). $data_block_size cannot be changed after the
thin-pool is created. People primarily interested in thin provisioning
-may want to use a value such as 1024 (512KB). People doing lots of
-snapshotting may want a smaller value such as 128 (64KB). If you are
+may want to use a value such as 1024 (512KiB). People doing lots of
+snapshotting may want a smaller value such as 128 (64KiB). If you are
not zeroing newly-allocated data, a larger $data_block_size in the
-region of 256000 (128MB) is suggested.
+region of 262144 (128MiB) is suggested.
$low_water_mark is expressed in blocks of size $data_block_size. If
free space on the data device drops below this level then a dm event
@@ -291,7 +291,7 @@ i) Constructor
error_if_no_space:
Error IOs, instead of queueing, if no space.
- Data block size must be between 64KB (128 sectors) and 1GB
+ Data block size must be between 64KiB (128 sectors) and 1GiB
(2097152 sectors) inclusive.
diff --git a/Documentation/admin-guide/device-mapper/vdo-design.rst b/Documentation/admin-guide/device-mapper/vdo-design.rst
new file mode 100644
index 000000000000..faa0ecd4a5ae
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/vdo-design.rst
@@ -0,0 +1,633 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+================
+Design of dm-vdo
+================
+
+The dm-vdo (virtual data optimizer) target provides inline deduplication,
+compression, zero-block elimination, and thin provisioning. A dm-vdo target
+can be backed by up to 256TB of storage, and can present a logical size of
+up to 4PB. This target was originally developed at Permabit Technology
+Corp. starting in 2009. It was first released in 2013 and has been used in
+production environments ever since. It was made open-source in 2017 after
+Permabit was acquired by Red Hat. This document describes the design of
+dm-vdo. For usage, see vdo.rst in the same directory as this file.
+
+Because deduplication rates fall drastically as the block size increases, a
+vdo target has a maximum block size of 4K. However, it can achieve
+deduplication rates of 254:1, i.e. up to 254 copies of a given 4K block can
+reference a single 4K of actual storage. It can achieve compression rates
+of 14:1. All zero blocks consume no storage at all.
+
+Theory of Operation
+===================
+
+The design of dm-vdo is based on the idea that deduplication is a two-part
+problem. The first is to recognize duplicate data. The second is to avoid
+storing multiple copies of those duplicates. Therefore, dm-vdo has two main
+parts: a deduplication index (called UDS) that is used to discover
+duplicate data, and a data store with a reference counted block map that
+maps from logical block addresses to the actual storage location of the
+data.
+
+Zones and Threading
+-------------------
+
+Due to the complexity of data optimization, the number of metadata
+structures involved in a single write operation to a vdo target is larger
+than most other targets. Furthermore, because vdo must operate on small
+block sizes in order to achieve good deduplication rates, acceptable
+performance can only be achieved through parallelism. Therefore, vdo's
+design attempts to be lock-free.
+
+Most of a vdo's main data structures are designed to be easily divided into
+"zones" such that any given bio must only access a single zone of any zoned
+structure. Safety with minimal locking is achieved by ensuring that during
+normal operation, each zone is assigned to a specific thread, and only that
+thread will access the portion of the data structure in that zone.
+Associated with each thread is a work queue. Each bio is associated with a
+request object (the "data_vio") which will be added to a work queue when
+the next phase of its operation requires access to the structures in the
+zone associated with that queue.
+
+Another way of thinking about this arrangement is that the work queue for
+each zone has an implicit lock on the structures it manages for all its
+operations, because vdo guarantees that no other thread will alter those
+structures.
+
+Although each structure is divided into zones, this division is not
+reflected in the on-disk representation of each data structure. Therefore,
+the number of zones for each structure, and hence the number of threads,
+can be reconfigured each time a vdo target is started.
+
+The Deduplication Index
+-----------------------
+
+In order to identify duplicate data efficiently, vdo was designed to
+leverage some common characteristics of duplicate data. From empirical
+observations, we gathered two key insights. The first is that in most data
+sets with significant amounts of duplicate data, the duplicates tend to
+have temporal locality. When a duplicate appears, it is more likely that
+other duplicates will be detected, and that those duplicates will have been
+written at about the same time. This is why the index keeps records in
+temporal order. The second insight is that new data is more likely to
+duplicate recent data than it is to duplicate older data and in general,
+there are diminishing returns to looking further back in time. Therefore,
+when the index is full, it should cull its oldest records to make space for
+new ones. Another important idea behind the design of the index is that the
+ultimate goal of deduplication is to reduce storage costs. Since there is a
+trade-off between the storage saved and the resources expended to achieve
+those savings, vdo does not attempt to find every last duplicate block. It
+is sufficient to find and eliminate most of the redundancy.
+
+Each block of data is hashed to produce a 16-byte block name. An index
+record consists of this block name paired with the presumed location of
+that data on the underlying storage. However, it is not possible to
+guarantee that the index is accurate. In the most common case, this occurs
+because it is too costly to update the index when a block is over-written
+or discarded. Doing so would require either storing the block name along
+with the blocks, which is difficult to do efficiently in block-based
+storage, or reading and rehashing each block before overwriting it.
+Inaccuracy can also result from a hash collision where two different blocks
+have the same name. In practice, this is extremely unlikely, but because
+vdo does not use a cryptographic hash, a malicious workload could be
+constructed. Because of these inaccuracies, vdo treats the locations in the
+index as hints, and reads each indicated block to verify that it is indeed
+a duplicate before sharing the existing block with a new one.
+
+Records are collected into groups called chapters. New records are added to
+the newest chapter, called the open chapter. This chapter is stored in a
+format optimized for adding and modifying records, and the content of the
+open chapter is not finalized until it runs out of space for new records.
+When the open chapter fills up, it is closed and a new open chapter is
+created to collect new records.
+
+Closing a chapter converts it to a different format which is optimized for
+reading. The records are written to a series of record pages based on the
+order in which they were received. This means that records with temporal
+locality should be on a small number of pages, reducing the I/O required to
+retrieve them. The chapter also compiles an index that indicates which
+record page contains any given name. This index means that a request for a
+name can determine exactly which record page may contain that record,
+without having to load the entire chapter from storage. This index uses
+only a subset of the block name as its key, so it cannot guarantee that an
+index entry refers to the desired block name. It can only guarantee that if
+there is a record for this name, it will be on the indicated page. Closed
+chapters are read-only structures and their contents are never altered in
+any way.
+
+Once enough records have been written to fill up all the available index
+space, the oldest chapter is removed to make space for new chapters. Any
+time a request finds a matching record in the index, that record is copied
+into the open chapter. This ensures that useful block names remain available
+in the index, while unreferenced block names are forgotten over time.
+
+In order to find records in older chapters, the index also maintains a
+higher level structure called the volume index, which contains entries
+mapping each block name to the chapter containing its newest record. This
+mapping is updated as records for the block name are copied or updated,
+ensuring that only the newest record for a given block name can be found.
+An older record for a block name will no longer be found even though it has
+not been deleted from its chapter. Like the chapter index, the volume index
+uses only a subset of the block name as its key and can not definitively
+say that a record exists for a name. It can only say which chapter would
+contain the record if a record exists. The volume index is stored entirely
+in memory and is saved to storage only when the vdo target is shut down.
+
+From the viewpoint of a request for a particular block name, it will first
+look up the name in the volume index. This search will either indicate that
+the name is new, or which chapter to search. If it returns a chapter, the
+request looks up its name in the chapter index. This will indicate either
+that the name is new, or which record page to search. Finally, if it is not
+new, the request will look for its name in the indicated record page.
+This process may require up to two page reads per request (one for the
+chapter index page and one for the request page). However, recently
+accessed pages are cached so that these page reads can be amortized across
+many block name requests.
+
+The volume index and the chapter indexes are implemented using a
+memory-efficient structure called a delta index. Instead of storing the
+entire block name (the key) for each entry, the entries are sorted by name
+and only the difference between adjacent keys (the delta) is stored.
+Because we expect the hashes to be randomly distributed, the size of the
+deltas follows an exponential distribution. Because of this distribution,
+the deltas are expressed using a Huffman code to take up even less space.
+The entire sorted list of keys is called a delta list. This structure
+allows the index to use many fewer bytes per entry than a traditional hash
+table, but it is slightly more expensive to look up entries, because a
+request must read every entry in a delta list to add up the deltas in order
+to find the record it needs. The delta index reduces this lookup cost by
+splitting its key space into many sub-lists, each starting at a fixed key
+value, so that each individual list is short.
+
+The default index size can hold 64 million records, corresponding to about
+256GB of data. This means that the index can identify duplicate data if the
+original data was written within the last 256GB of writes. This range is
+called the deduplication window. If new writes duplicate data that is older
+than that, the index will not be able to find it because the records of the
+older data have been removed. This means that if an application writes a
+200 GB file to a vdo target and then immediately writes it again, the two
+copies will deduplicate perfectly. Doing the same with a 500 GB file will
+result in no deduplication, because the beginning of the file will no
+longer be in the index by the time the second write begins (assuming there
+is no duplication within the file itself).
+
+If an application anticipates a data workload that will see useful
+deduplication beyond the 256GB threshold, vdo can be configured to use a
+larger index with a correspondingly larger deduplication window. (This
+configuration can only be set when the target is created, not altered
+later. It is important to consider the expected workload for a vdo target
+before configuring it.) There are two ways to do this.
+
+One way is to increase the memory size of the index, which also increases
+the amount of backing storage required. Doubling the size of the index will
+double the length of the deduplication window at the expense of doubling
+the storage size and the memory requirements.
+
+The other option is to enable sparse indexing. Sparse indexing increases
+the deduplication window by a factor of 10, at the expense of also
+increasing the storage size by a factor of 10. However with sparse
+indexing, the memory requirements do not increase. The trade-off is
+slightly more computation per request and a slight decrease in the amount
+of deduplication detected. For most workloads with significant amounts of
+duplicate data, sparse indexing will detect 97-99% of the deduplication
+that a standard index will detect.
+
+The vio and data_vio Structures
+-------------------------------
+
+A vio (short for Vdo I/O) is conceptually similar to a bio, with additional
+fields and data to track vdo-specific information. A struct vio maintains a
+pointer to a bio but also tracks other fields specific to the operation of
+vdo. The vio is kept separate from its related bio because there are many
+circumstances where vdo completes the bio but must continue to do work
+related to deduplication or compression.
+
+Metadata reads and writes, and other writes that originate within vdo, use
+a struct vio directly. Application reads and writes use a larger structure
+called a data_vio to track information about their progress. A struct
+data_vio contain a struct vio and also includes several other fields
+related to deduplication and other vdo features. The data_vio is the
+primary unit of application work in vdo. Each data_vio proceeds through a
+set of steps to handle the application data, after which it is reset and
+returned to a pool of data_vios for reuse.
+
+There is a fixed pool of 2048 data_vios. This number was chosen to bound
+the amount of work that is required to recover from a crash. In addition,
+benchmarks have indicated that increasing the size of the pool does not
+significantly improve performance.
+
+The Data Store
+--------------
+
+The data store is implemented by three main data structures, all of which
+work in concert to reduce or amortize metadata updates across as many data
+writes as possible.
+
+*The Slab Depot*
+
+Most of the vdo volume belongs to the slab depot. The depot contains a
+collection of slabs. The slabs can be up to 32GB, and are divided into
+three sections. Most of a slab consists of a linear sequence of 4K blocks.
+These blocks are used either to store data, or to hold portions of the
+block map (see below). In addition to the data blocks, each slab has a set
+of reference counters, using 1 byte for each data block. Finally each slab
+has a journal.
+
+Reference updates are written to the slab journal. Slab journal blocks are
+written out either when they are full, or when the recovery journal
+requests they do so in order to allow the main recovery journal (see below)
+to free up space. The slab journal is used both to ensure that the main
+recovery journal can regularly free up space, and also to amortize the cost
+of updating individual reference blocks. The reference counters are kept in
+memory and are written out, a block at a time in oldest-dirtied-order, only
+when there is a need to reclaim slab journal space. The write operations
+are performed in the background as needed so they do not add latency to
+particular I/O operations.
+
+Each slab is independent of every other. They are assigned to "physical
+zones" in round-robin fashion. If there are P physical zones, then slab n
+is assigned to zone n mod P.
+
+The slab depot maintains an additional small data structure, the "slab
+summary," which is used to reduce the amount of work needed to come back
+online after a crash. The slab summary maintains an entry for each slab
+indicating whether or not the slab has ever been used, whether all of its
+reference count updates have been persisted to storage, and approximately
+how full it is. During recovery, each physical zone will attempt to recover
+at least one slab, stopping whenever it has recovered a slab which has some
+free blocks. Once each zone has some space, or has determined that none is
+available, the target can resume normal operation in a degraded mode. Read
+and write requests can be serviced, perhaps with degraded performance,
+while the remainder of the dirty slabs are recovered.
+
+*The Block Map*
+
+The block map contains the logical to physical mapping. It can be thought
+of as an array with one entry per logical address. Each entry is 5 bytes,
+36 bits of which contain the physical block number which holds the data for
+the given logical address. The other 4 bits are used to indicate the nature
+of the mapping. Of the 16 possible states, one represents a logical address
+which is unmapped (i.e. it has never been written, or has been discarded),
+one represents an uncompressed block, and the other 14 states are used to
+indicate that the mapped data is compressed, and which of the compression
+slots in the compressed block contains the data for this logical address.
+
+In practice, the array of mapping entries is divided into "block map
+pages," each of which fits in a single 4K block. Each block map page
+consists of a header and 812 mapping entries. Each mapping page is actually
+a leaf of a radix tree which consists of block map pages at each level.
+There are 60 radix trees which are assigned to "logical zones" in round
+robin fashion. (If there are L logical zones, tree n will belong to zone n
+mod L.) At each level, the trees are interleaved, so logical addresses
+0-811 belong to tree 0, logical addresses 812-1623 belong to tree 1, and so
+on. The interleaving is maintained all the way up to the 60 root nodes.
+Choosing 60 trees results in an evenly distributed number of trees per zone
+for a large number of possible logical zone counts. The storage for the 60
+tree roots is allocated at format time. All other block map pages are
+allocated out of the slabs as needed. This flexible allocation avoids the
+need to pre-allocate space for the entire set of logical mappings and also
+makes growing the logical size of a vdo relatively easy.
+
+In operation, the block map maintains two caches. It is prohibitive to keep
+the entire leaf level of the trees in memory, so each logical zone
+maintains its own cache of leaf pages. The size of this cache is
+configurable at target start time. The second cache is allocated at start
+time, and is large enough to hold all the non-leaf pages of the entire
+block map. This cache is populated as pages are needed.
+
+*The Recovery Journal*
+
+The recovery journal is used to amortize updates across the block map and
+slab depot. Each write request causes an entry to be made in the journal.
+Entries are either "data remappings" or "block map remappings." For a data
+remapping, the journal records the logical address affected and its old and
+new physical mappings. For a block map remapping, the journal records the
+block map page number and the physical block allocated for it. Block map
+pages are never reclaimed or repurposed, so the old mapping is always 0.
+
+Each journal entry is an intent record summarizing the metadata updates
+that are required for a data_vio. The recovery journal issues a flush
+before each journal block write to ensure that the physical data for the
+new block mappings in that block are stable on storage, and journal block
+writes are all issued with the FUA bit set to ensure the recovery journal
+entries themselves are stable. The journal entry and the data write it
+represents must be stable on disk before the other metadata structures may
+be updated to reflect the operation. These entries allow the vdo device to
+reconstruct the logical to physical mappings after an unexpected
+interruption such as a loss of power.
+
+*Write Path*
+
+All write I/O to vdo is asynchronous. Each bio will be acknowledged as soon
+as vdo has done enough work to guarantee that it can complete the write
+eventually. Generally, the data for acknowledged but unflushed write I/O
+can be treated as though it is cached in memory. If an application
+requires data to be stable on storage, it must issue a flush or write the
+data with the FUA bit set like any other asynchronous I/O. Shutting down
+the vdo target will also flush any remaining I/O.
+
+Application write bios follow the steps outlined below.
+
+1. A data_vio is obtained from the data_vio pool and associated with the
+ application bio. If there are no data_vios available, the incoming bio
+ will block until a data_vio is available. This provides back pressure
+ to the application. The data_vio pool is protected by a spin lock.
+
+ The newly acquired data_vio is reset and the bio's data is copied into
+ the data_vio if it is a write and the data is not all zeroes. The data
+ must be copied because the application bio can be acknowledged before
+ the data_vio processing is complete, which means later processing steps
+ will no longer have access to the application bio. The application bio
+ may also be smaller than 4K, in which case the data_vio will have
+ already read the underlying block and the data is instead copied over
+ the relevant portion of the larger block.
+
+2. The data_vio places a claim (the "logical lock") on the logical address
+ of the bio. It is vital to prevent simultaneous modifications of the
+ same logical address, because deduplication involves sharing blocks.
+ This claim is implemented as an entry in a hashtable where the key is
+ the logical address and the value is a pointer to the data_vio
+ currently handling that address.
+
+ If a data_vio looks in the hashtable and finds that another data_vio is
+ already operating on that logical address, it waits until the previous
+ operation finishes. It also sends a message to inform the current
+ lock holder that it is waiting. Most notably, a new data_vio waiting
+ for a logical lock will flush the previous lock holder out of the
+ compression packer (step 8d) rather than allowing it to continue
+ waiting to be packed.
+
+ This stage requires the data_vio to get an implicit lock on the
+ appropriate logical zone to prevent concurrent modifications of the
+ hashtable. This implicit locking is handled by the zone divisions
+ described above.
+
+3. The data_vio traverses the block map tree to ensure that all the
+ necessary internal tree nodes have been allocated, by trying to find
+ the leaf page for its logical address. If any interior tree page is
+ missing, it is allocated at this time out of the same physical storage
+ pool used to store application data.
+
+ a. If any page-node in the tree has not yet been allocated, it must be
+ allocated before the write can continue. This step requires the
+ data_vio to lock the page-node that needs to be allocated. This
+ lock, like the logical block lock in step 2, is a hashtable entry
+ that causes other data_vios to wait for the allocation process to
+ complete.
+
+ The implicit logical zone lock is released while the allocation is
+ happening, in order to allow other operations in the same logical
+ zone to proceed. The details of allocation are the same as in
+ step 4. Once a new node has been allocated, that node is added to
+ the tree using a similar process to adding a new data block mapping.
+ The data_vio journals the intent to add the new node to the block
+ map tree (step 10), updates the reference count of the new block
+ (step 11), and reacquires the implicit logical zone lock to add the
+ new mapping to the parent tree node (step 12). Once the tree is
+ updated, the data_vio proceeds down the tree. Any other data_vios
+ waiting on this allocation also proceed.
+
+ b. In the steady-state case, the block map tree nodes will already be
+ allocated, so the data_vio just traverses the tree until it finds
+ the required leaf node. The location of the mapping (the "block map
+ slot") is recorded in the data_vio so that later steps do not need
+ to traverse the tree again. The data_vio then releases the implicit
+ logical zone lock.
+
+4. If the block is a zero block, skip to step 9. Otherwise, an attempt is
+ made to allocate a free data block. This allocation ensures that the
+ data_vio can write its data somewhere even if deduplication and
+ compression are not possible. This stage gets an implicit lock on a
+ physical zone to search for free space within that zone.
+
+ The data_vio will search each slab in a zone until it finds a free
+ block or decides there are none. If the first zone has no free space,
+ it will proceed to search the next physical zone by taking the implicit
+ lock for that zone and releasing the previous one until it finds a
+ free block or runs out of zones to search. The data_vio will acquire a
+ struct pbn_lock (the "physical block lock") on the free block. The
+ struct pbn_lock also has several fields to record the various kinds of
+ claims that data_vios can have on physical blocks. The pbn_lock is
+ added to a hashtable like the logical block locks in step 2. This
+ hashtable is also covered by the implicit physical zone lock. The
+ reference count of the free block is updated to prevent any other
+ data_vio from considering it free. The reference counters are a
+ sub-component of the slab and are thus also covered by the implicit
+ physical zone lock.
+
+5. If an allocation was obtained, the data_vio has all the resources it
+ needs to complete the write. The application bio can safely be
+ acknowledged at this point. The acknowledgment happens on a separate
+ thread to prevent the application callback from blocking other data_vio
+ operations.
+
+ If an allocation could not be obtained, the data_vio continues to
+ attempt to deduplicate or compress the data, but the bio is not
+ acknowledged because the vdo device may be out of space.
+
+6. At this point vdo must determine where to store the application data.
+ The data_vio's data is hashed and the hash (the "record name") is
+ recorded in the data_vio.
+
+7. The data_vio reserves or joins a struct hash_lock, which manages all of
+ the data_vios currently writing the same data. Active hash locks are
+ tracked in a hashtable similar to the way logical block locks are
+ tracked in step 2. This hashtable is covered by the implicit lock on
+ the hash zone.
+
+ If there is no existing hash lock for this data_vio's record_name, the
+ data_vio obtains a hash lock from the pool, adds it to the hashtable,
+ and sets itself as the new hash lock's "agent." The hash_lock pool is
+ also covered by the implicit hash zone lock. The hash lock agent will
+ do all the work to decide where the application data will be
+ written. If a hash lock for the data_vio's record_name already exists,
+ and the data_vio's data is the same as the agent's data, the new
+ data_vio will wait for the agent to complete its work and then share
+ its result.
+
+ In the rare case that a hash lock exists for the data_vio's hash but
+ the data does not match the hash lock's agent, the data_vio skips to
+ step 8h and attempts to write its data directly. This can happen if two
+ different data blocks produce the same hash, for example.
+
+8. The hash lock agent attempts to deduplicate or compress its data with
+ the following steps.
+
+ a. The agent initializes and sends its embedded deduplication request
+ (struct uds_request) to the deduplication index. This does not
+ require the data_vio to get any locks because the index components
+ manage their own locking. The data_vio waits until it either gets a
+ response from the index or times out.
+
+ b. If the deduplication index returns advice, the data_vio attempts to
+ obtain a physical block lock on the indicated physical address, in
+ order to read the data and verify that it is the same as the
+ data_vio's data, and that it can accept more references. If the
+ physical address is already locked by another data_vio, the data at
+ that address may soon be overwritten so it is not safe to use the
+ address for deduplication.
+
+ c. If the data matches and the physical block can add references, the
+ agent and any other data_vios waiting on it will record this
+ physical block as their new physical address and proceed to step 9
+ to record their new mapping. If there are more data_vios in the hash
+ lock than there are references available, one of the remaining
+ data_vios becomes the new agent and continues to step 8d as if no
+ valid advice was returned.
+
+ d. If no usable duplicate block was found, the agent first checks that
+ it has an allocated physical block (from step 3) that it can write
+ to. If the agent does not have an allocation, some other data_vio in
+ the hash lock that does have an allocation takes over as agent. If
+ none of the data_vios have an allocated physical block, these writes
+ are out of space, so they proceed to step 13 for cleanup.
+
+ e. The agent attempts to compress its data. If the data does not
+ compress, the data_vio will continue to step 8h to write its data
+ directly.
+
+ If the compressed size is small enough, the agent will release the
+ implicit hash zone lock and go to the packer (struct packer) where
+ it will be placed in a bin (struct packer_bin) along with other
+ data_vios. All compression operations require the implicit lock on
+ the packer zone.
+
+ The packer can combine up to 14 compressed blocks in a single 4k
+ data block. Compression is only helpful if vdo can pack at least 2
+ data_vios into a single data block. This means that a data_vio may
+ wait in the packer for an arbitrarily long time for other data_vios
+ to fill out the compressed block. There is a mechanism for vdo to
+ evict waiting data_vios when continuing to wait would cause
+ problems. Circumstances causing an eviction include an application
+ flush, device shutdown, or a subsequent data_vio trying to overwrite
+ the same logical block address. A data_vio may also be evicted from
+ the packer if it cannot be paired with any other compressed block
+ before more compressible blocks need to use its bin. An evicted
+ data_vio will proceed to step 8h to write its data directly.
+
+ f. If the agent fills a packer bin, either because all 14 of its slots
+ are used or because it has no remaining space, it is written out
+ using the allocated physical block from one of its data_vios. Step
+ 8d has already ensured that an allocation is available.
+
+ g. Each data_vio sets the compressed block as its new physical address.
+ The data_vio obtains an implicit lock on the physical zone and
+ acquires the struct pbn_lock for the compressed block, which is
+ modified to be a shared lock. Then it releases the implicit physical
+ zone lock and proceeds to step 8i.
+
+ h. Any data_vio evicted from the packer will have an allocation from
+ step 3. It will write its data to that allocated physical block.
+
+ i. After the data is written, if the data_vio is the agent of a hash
+ lock, it will reacquire the implicit hash zone lock and share its
+ physical address with as many other data_vios in the hash lock as
+ possible. Each data_vio will then proceed to step 9 to record its
+ new mapping.
+
+ j. If the agent actually wrote new data (whether compressed or not),
+ the deduplication index is updated to reflect the location of the
+ new data. The agent then releases the implicit hash zone lock.
+
+9. The data_vio determines the previous mapping of the logical address.
+ There is a cache for block map leaf pages (the "block map cache"),
+ because there are usually too many block map leaf nodes to store
+ entirely in memory. If the desired leaf page is not in the cache, the
+ data_vio will reserve a slot in the cache and load the desired page
+ into it, possibly evicting an older cached page. The data_vio then
+ finds the current physical address for this logical address (the "old
+ physical mapping"), if any, and records it. This step requires a lock
+ on the block map cache structures, covered by the implicit logical zone
+ lock.
+
+10. The data_vio makes an entry in the recovery journal containing the
+ logical block address, the old physical mapping, and the new physical
+ mapping. Making this journal entry requires holding the implicit
+ recovery journal lock. The data_vio will wait in the journal until all
+ recovery blocks up to the one containing its entry have been written
+ and flushed to ensure the transaction is stable on storage.
+
+11. Once the recovery journal entry is stable, the data_vio makes two slab
+ journal entries: an increment entry for the new mapping, and a
+ decrement entry for the old mapping. These two operations each require
+ holding a lock on the affected physical slab, covered by its implicit
+ physical zone lock. For correctness during recovery, the slab journal
+ entries in any given slab journal must be in the same order as the
+ corresponding recovery journal entries. Therefore, if the two entries
+ are in different zones, they are made concurrently, and if they are in
+ the same zone, the increment is always made before the decrement in
+ order to avoid underflow. After each slab journal entry is made in
+ memory, the associated reference count is also updated in memory.
+
+12. Once both of the reference count updates are done, the data_vio
+ acquires the implicit logical zone lock and updates the
+ logical-to-physical mapping in the block map to point to the new
+ physical block. At this point the write operation is complete.
+
+13. If the data_vio has a hash lock, it acquires the implicit hash zone
+ lock and releases its hash lock to the pool.
+
+ The data_vio then acquires the implicit physical zone lock and releases
+ the struct pbn_lock it holds for its allocated block. If it had an
+ allocation that it did not use, it also sets the reference count for
+ that block back to zero to free it for use by subsequent data_vios.
+
+ The data_vio then acquires the implicit logical zone lock and releases
+ the logical block lock acquired in step 2.
+
+ The application bio is then acknowledged if it has not previously been
+ acknowledged, and the data_vio is returned to the pool.
+
+*Read Path*
+
+An application read bio follows a much simpler set of steps. It does steps
+1 and 2 in the write path to obtain a data_vio and lock its logical
+address. If there is already a write data_vio in progress for that logical
+address that is guaranteed to complete, the read data_vio will copy the
+data from the write data_vio and return it. Otherwise, it will look up the
+logical-to-physical mapping by traversing the block map tree as in step 3,
+and then read and possibly decompress the indicated data at the indicated
+physical block address. A read data_vio will not allocate block map tree
+nodes if they are missing. If the interior block map nodes do not exist
+yet, the logical block map address must still be unmapped and the read
+data_vio will return all zeroes. A read data_vio handles cleanup and
+acknowledgment as in step 13, although it only needs to release the logical
+lock and return itself to the pool.
+
+*Small Writes*
+
+All storage within vdo is managed as 4KB blocks, but it can accept writes
+as small as 512 bytes. Processing a write that is smaller than 4K requires
+a read-modify-write operation that reads the relevant 4K block, copies the
+new data over the appropriate sectors of the block, and then launches a
+write operation for the modified data block. The read and write stages of
+this operation are nearly identical to the normal read and write
+operations, and a single data_vio is used throughout this operation.
+
+*Recovery*
+
+When a vdo is restarted after a crash, it will attempt to recover from the
+recovery journal. During the pre-resume phase of the next start, the
+recovery journal is read. The increment portion of valid entries are played
+into the block map. Next, valid entries are played, in order as required,
+into the slab journals. Finally, each physical zone attempts to replay at
+least one slab journal to reconstruct the reference counts of one slab.
+Once each zone has some free space (or has determined that it has none),
+the vdo comes back online, while the remainder of the slab journals are
+used to reconstruct the rest of the reference counts in the background.
+
+*Read-only Rebuild*
+
+If a vdo encounters an unrecoverable error, it will enter read-only mode.
+This mode indicates that some previously acknowledged data may have been
+lost. The vdo may be instructed to rebuild as best it can in order to
+return to a writable state. However, this is never done automatically due
+to the possibility that data has been lost. During a read-only rebuild, the
+block map is recovered from the recovery journal as before. However, the
+reference counts are not rebuilt from the slab journals. Instead, the
+reference counts are zeroed, the entire block map is traversed, and the
+reference counts are updated from the block mappings. While this may lose
+some data, it ensures that the block map and reference counts are
+consistent with each other. This allows vdo to resume normal operation and
+accept further writes.
diff --git a/Documentation/admin-guide/device-mapper/vdo.rst b/Documentation/admin-guide/device-mapper/vdo.rst
new file mode 100644
index 000000000000..8a67b320a97b
--- /dev/null
+++ b/Documentation/admin-guide/device-mapper/vdo.rst
@@ -0,0 +1,413 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+======
+dm-vdo
+======
+
+The dm-vdo (virtual data optimizer) device mapper target provides
+block-level deduplication, compression, and thin provisioning. As a device
+mapper target, it can add these features to the storage stack, compatible
+with any file system. The vdo target does not protect against data
+corruption, relying instead on integrity protection of the storage below
+it. It is strongly recommended that lvm be used to manage vdo volumes. See
+lvmvdo(7).
+
+Userspace component
+===================
+
+Formatting a vdo volume requires the use of the 'vdoformat' tool, available
+at:
+
+https://github.com/dm-vdo/vdo/
+
+In most cases, a vdo target will recover from a crash automatically the
+next time it is started. In cases where it encountered an unrecoverable
+error (either during normal operation or crash recovery) the target will
+enter or come up in read-only mode. Because read-only mode is indicative of
+data-loss, a positive action must be taken to bring vdo out of read-only
+mode. The 'vdoforcerebuild' tool, available from the same repo, is used to
+prepare a read-only vdo to exit read-only mode. After running this tool,
+the vdo target will rebuild its metadata the next time it is
+started. Although some data may be lost, the rebuilt vdo's metadata will be
+internally consistent and the target will be writable again.
+
+The repo also contains additional userspace tools which can be used to
+inspect a vdo target's on-disk metadata. Fortunately, these tools are
+rarely needed except by dm-vdo developers.
+
+Metadata requirements
+=====================
+
+Each vdo volume reserves 3GB of space for metadata, or more depending on
+its configuration. It is helpful to check that the space saved by
+deduplication and compression is not cancelled out by the metadata
+requirements. An estimation of the space saved for a specific dataset can
+be computed with the vdo estimator tool, which is available at:
+
+https://github.com/dm-vdo/vdoestimator/
+
+Target interface
+================
+
+Table line
+----------
+
+::
+
+ <offset> <logical device size> vdo V4 <storage device>
+ <storage device size> <minimum I/O size> <block map cache size>
+ <block map era length> [optional arguments]
+
+
+Required parameters:
+
+ offset:
+ The offset, in sectors, at which the vdo volume's logical
+ space begins.
+
+ logical device size:
+ The size of the device which the vdo volume will service,
+ in sectors. Must match the current logical size of the vdo
+ volume.
+
+ storage device:
+ The device holding the vdo volume's data and metadata.
+
+ storage device size:
+ The size of the device holding the vdo volume, as a number
+ of 4096-byte blocks. Must match the current size of the vdo
+ volume.
+
+ minimum I/O size:
+ The minimum I/O size for this vdo volume to accept, in
+ bytes. Valid values are 512 or 4096. The recommended value
+ is 4096.
+
+ block map cache size:
+ The size of the block map cache, as a number of 4096-byte
+ blocks. The minimum and recommended value is 32768 blocks.
+ If the logical thread count is non-zero, the cache size
+ must be at least 4096 blocks per logical thread.
+
+ block map era length:
+ The speed with which the block map cache writes out
+ modified block map pages. A smaller era length is likely to
+ reduce the amount of time spent rebuilding, at the cost of
+ increased block map writes during normal operation. The
+ maximum and recommended value is 16380; the minimum value
+ is 1.
+
+Optional parameters:
+--------------------
+Some or all of these parameters may be specified as <key> <value> pairs.
+
+Thread related parameters:
+
+Different categories of work are assigned to separate thread groups, and
+the number of threads in each group can be configured separately.
+
+If <hash>, <logical>, and <physical> are all set to 0, the work handled by
+all three thread types will be handled by a single thread. If any of these
+values are non-zero, all of them must be non-zero.
+
+ ack:
+ The number of threads used to complete bios. Since
+ completing a bio calls an arbitrary completion function
+ outside the vdo volume, threads of this type allow the vdo
+ volume to continue processing requests even when bio
+ completion is slow. The default is 1.
+
+ bio:
+ The number of threads used to issue bios to the underlying
+ storage. Threads of this type allow the vdo volume to
+ continue processing requests even when bio submission is
+ slow. The default is 4.
+
+ bioRotationInterval:
+ The number of bios to enqueue on each bio thread before
+ switching to the next thread. The value must be greater
+ than 0 and not more than 1024; the default is 64.
+
+ cpu:
+ The number of threads used to do CPU-intensive work, such
+ as hashing and compression. The default is 1.
+
+ hash:
+ The number of threads used to manage data comparisons for
+ deduplication based on the hash value of data blocks. The
+ default is 0.
+
+ logical:
+ The number of threads used to manage caching and locking
+ based on the logical address of incoming bios. The default
+ is 0; the maximum is 60.
+
+ physical:
+ The number of threads used to manage administration of the
+ underlying storage device. At format time, a slab size for
+ the vdo is chosen; the vdo storage device must be large
+ enough to have at least 1 slab per physical thread. The
+ default is 0; the maximum is 16.
+
+Miscellaneous parameters:
+
+ maxDiscard:
+ The maximum size of discard bio accepted, in 4096-byte
+ blocks. I/O requests to a vdo volume are normally split
+ into 4096-byte blocks, and processed up to 2048 at a time.
+ However, discard requests to a vdo volume can be
+ automatically split to a larger size, up to <maxDiscard>
+ 4096-byte blocks in a single bio, and are limited to 1500
+ at a time. Increasing this value may provide better overall
+ performance, at the cost of increased latency for the
+ individual discard requests. The default and minimum is 1;
+ the maximum is UINT_MAX / 4096.
+
+ deduplication:
+ Whether deduplication is enabled. The default is 'on'; the
+ acceptable values are 'on' and 'off'.
+
+ compression:
+ Whether compression is enabled. The default is 'off'; the
+ acceptable values are 'on' and 'off'.
+
+Device modification
+-------------------
+
+A modified table may be loaded into a running, non-suspended vdo volume.
+The modifications will take effect when the device is next resumed. The
+modifiable parameters are <logical device size>, <physical device size>,
+<maxDiscard>, <compression>, and <deduplication>.
+
+If the logical device size or physical device size are changed, upon
+successful resume vdo will store the new values and require them on future
+startups. These two parameters may not be decreased. The logical device
+size may not exceed 4 PB. The physical device size must increase by at
+least 32832 4096-byte blocks if at all, and must not exceed the size of the
+underlying storage device. Additionally, when formatting the vdo device, a
+slab size is chosen: the physical device size may never increase above the
+size which provides 8192 slabs, and each increase must be large enough to
+add at least one new slab.
+
+Examples:
+
+Start a previously-formatted vdo volume with 1 GB logical space and 1 GB
+physical space, storing to /dev/dm-1 which has more than 1 GB of space.
+
+::
+
+ dmsetup create vdo0 --table \
+ "0 2097152 vdo V4 /dev/dm-1 262144 4096 32768 16380"
+
+Grow the logical size to 4 GB.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 8388608 vdo V4 /dev/dm-1 262144 4096 32768 16380"
+ dmsetup resume vdo0
+
+Grow the physical size to 2 GB.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 8388608 vdo V4 /dev/dm-1 524288 4096 32768 16380"
+ dmsetup resume vdo0
+
+Grow the physical size by 1 GB more and increase max discard sectors.
+
+::
+
+ dmsetup reload vdo0 --table \
+ "0 10485760 vdo V4 /dev/dm-1 786432 4096 32768 16380 maxDiscard 8"
+ dmsetup resume vdo0
+
+Stop the vdo volume.
+
+::
+
+ dmsetup remove vdo0
+
+Start the vdo volume again. Note that the logical and physical device sizes
+must still match, but other parameters can change.
+
+::
+
+ dmsetup create vdo1 --table \
+ "0 10485760 vdo V4 /dev/dm-1 786432 512 65550 5000 hash 1 logical 3 physical 2"
+
+Messages
+--------
+All vdo devices accept messages in the form:
+
+::
+
+ dmsetup message <target-name> 0 <message-name> <message-parameters>
+
+The messages are:
+
+ stats:
+ Outputs the current view of the vdo statistics. Mostly used
+ by the vdostats userspace program to interpret the output
+ buffer.
+
+ config:
+ Outputs useful vdo configuration information. Mostly used
+ by users who want to recreate a similar VDO volume and
+ want to know the creation configuration used.
+
+ dump:
+ Dumps many internal structures to the system log. This is
+ not always safe to run, so it should only be used to debug
+ a hung vdo. Optional parameters to specify structures to
+ dump are:
+
+ viopool: The pool of I/O requests incoming bios
+ pools: A synonym of 'viopool'
+ vdo: Most of the structures managing on-disk data
+ queues: Basic information about each vdo thread
+ threads: A synonym of 'queues'
+ default: Equivalent to 'queues vdo'
+ all: All of the above.
+
+ dump-on-shutdown:
+ Perform a default dump next time vdo shuts down.
+
+
+Status
+------
+
+::
+
+ <device> <operating mode> <in recovery> <index state>
+ <compression state> <physical blocks used> <total physical blocks>
+
+ device:
+ The name of the vdo volume.
+
+ operating mode:
+ The current operating mode of the vdo volume; values may be
+ 'normal', 'recovering' (the volume has detected an issue
+ with its metadata and is attempting to repair itself), and
+ 'read-only' (an error has occurred that forces the vdo
+ volume to only support read operations and not writes).
+
+ in recovery:
+ Whether the vdo volume is currently in recovery mode;
+ values may be 'recovering' or '-' which indicates not
+ recovering.
+
+ index state:
+ The current state of the deduplication index in the vdo
+ volume; values may be 'closed', 'closing', 'error',
+ 'offline', 'online', 'opening', and 'unknown'.
+
+ compression state:
+ The current state of compression in the vdo volume; values
+ may be 'offline' and 'online'.
+
+ used physical blocks:
+ The number of physical blocks in use by the vdo volume.
+
+ total physical blocks:
+ The total number of physical blocks the vdo volume may use;
+ the difference between this value and the
+ <used physical blocks> is the number of blocks the vdo
+ volume has left before being full.
+
+Memory Requirements
+===================
+
+A vdo target requires a fixed 38 MB of RAM along with the following amounts
+that scale with the target:
+
+- 1.15 MB of RAM for each 1 MB of configured block map cache size. The
+ block map cache requires a minimum of 150 MB.
+- 1.6 MB of RAM for each 1 TB of logical space.
+- 268 MB of RAM for each 1 TB of physical storage managed by the volume.
+
+The deduplication index requires additional memory which scales with the
+size of the deduplication window. For dense indexes, the index requires 1
+GB of RAM per 1 TB of window. For sparse indexes, the index requires 1 GB
+of RAM per 10 TB of window. The index configuration is set when the target
+is formatted and may not be modified.
+
+Module Parameters
+=================
+
+The vdo driver has a numeric parameter 'log_level' which controls the
+verbosity of logging from the driver. The default setting is 6
+(LOGLEVEL_INFO and more severe messages).
+
+Run-time Usage
+==============
+
+When using dm-vdo, it is important to be aware of the ways in which its
+behavior differs from other storage targets.
+
+- There is no guarantee that over-writes of existing blocks will succeed.
+ Because the underlying storage may be multiply referenced, over-writing
+ an existing block generally requires a vdo to have a free block
+ available.
+
+- When blocks are no longer in use, sending a discard request for those
+ blocks lets the vdo release references for those blocks. If the vdo is
+ thinly provisioned, discarding unused blocks is essential to prevent the
+ target from running out of space. However, due to the sharing of
+ duplicate blocks, no discard request for any given logical block is
+ guaranteed to reclaim space.
+
+- Assuming the underlying storage properly implements flush requests, vdo
+ is resilient against crashes, however, unflushed writes may or may not
+ persist after a crash.
+
+- Each write to a vdo target entails a significant amount of processing.
+ However, much of the work is paralellizable. Therefore, vdo targets
+ achieve better throughput at higher I/O depths, and can support up 2048
+ requests in parallel.
+
+Tuning
+======
+
+The vdo device has many options, and it can be difficult to make optimal
+choices without perfect knowledge of the workload. Additionally, most
+configuration options must be set when a vdo target is started, and cannot
+be changed without shutting it down completely; the configuration cannot be
+changed while the target is active. Ideally, tuning with simulated
+workloads should be performed before deploying vdo in production
+environments.
+
+The most important value to adjust is the block map cache size. In order to
+service a request for any logical address, a vdo must load the portion of
+the block map which holds the relevant mapping. These mappings are cached.
+Performance will suffer when the working set does not fit in the cache. By
+default, a vdo allocates 128 MB of metadata cache in RAM to support
+efficient access to 100 GB of logical space at a time. It should be scaled
+up proportionally for larger working sets.
+
+The logical and physical thread counts should also be adjusted. A logical
+thread controls a disjoint section of the block map, so additional logical
+threads increase parallelism and can increase throughput. Physical threads
+control a disjoint section of the data blocks, so additional physical
+threads can also increase throughput. However, excess threads can waste
+resources and increase contention.
+
+Bio submission threads control the parallelism involved in sending I/O to
+the underlying storage; fewer threads mean there is more opportunity to
+reorder I/O requests for performance benefit, but also that each I/O
+request has to wait longer before being submitted.
+
+Bio acknowledgment threads are used for finishing I/O requests. This is
+done on dedicated threads since the amount of work required to execute a
+bio's callback can not be controlled by the vdo itself. Usually one thread
+is sufficient but additional threads may be beneficial, particularly when
+bios have CPU-heavy callbacks.
+
+CPU threads are used for hashing and for compression; in workloads with
+compression enabled, more threads may result in higher throughput.
+
+Hash threads are used to sort active requests by hash and determine whether
+they should deduplicate; the most CPU intensive actions done by these
+threads are comparison of 4096-byte data blocks. In most cases, a single
+hash thread is sufficient.
diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst
index a65c1602cb23..8c3f1f967a3c 100644
--- a/Documentation/admin-guide/device-mapper/verity.rst
+++ b/Documentation/admin-guide/device-mapper/verity.rst
@@ -87,6 +87,15 @@ panic_on_corruption
Panic the device when a corrupted block is discovered. This option is
not compatible with ignore_corruption and restart_on_corruption.
+restart_on_error
+ Restart the system when an I/O error is detected.
+ This option can be combined with the restart_on_corruption option.
+
+panic_on_error
+ Panic the device when an I/O error is detected. This option is
+ not compatible with the restart_on_error option but can be combined
+ with the panic_on_corruption option.
+
ignore_zero_blocks
Do not verify blocks that are expected to contain zeroes and always return
zeroes instead. This may be useful if the partition contains unused blocks
@@ -142,8 +151,15 @@ root_hash_sig_key_desc <key_description>
already in the secondary trusted keyring.
try_verify_in_tasklet
- If verity hashes are in cache, verify data blocks in kernel tasklet instead
- of workqueue. This option can reduce IO latency.
+ If verity hashes are in cache and the IO size does not exceed the limit,
+ verify data blocks in bottom half instead of workqueue. This option can
+ reduce IO latency. The size limits can be configured via
+ /sys/module/dm_verity/parameters/use_bh_bytes. The four parameters
+ correspond to limits for IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE and IOPRIO_CLASS_IDLE in turn.
+ For example:
+ <none>,<rt>,<be>,<idle>
+ 4096,4096,4096,4096
Theory of operation
===================
diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst
index 0e9b48daf690..095a63892257 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -26,6 +26,11 @@ Dynamic debug provides:
- format string
- class name (as known/declared by each module)
+NOTE: To actually get the debug-print output on the console, you may
+need to adjust the kernel ``loglevel=``, or use ``ignore_loglevel``.
+Read about these kernel parameters in
+Documentation/admin-guide/kernel-parameters.rst.
+
Viewing Dynamic Debug Behaviour
===============================
@@ -218,12 +223,13 @@ The flags are::
f Include the function name
s Include the source file name
l Include line number
+ d Include call trace
For ``print_hex_dump_debug()`` and ``print_hex_dump_bytes()``, only
the ``p`` flag has meaning, other flags are ignored.
-Note the regexp ``^[-+=][fslmpt_]+$`` matches a flags specification.
-To clear all flags at once, use ``=_`` or ``-fslmpt``.
+Note the regexp ``^[-+=][fslmptd_]+$`` matches a flags specification.
+To clear all flags at once, use ``=_`` or ``-fslmptd``.
Debug messages during Boot Process
diff --git a/Documentation/admin-guide/edid.rst b/Documentation/admin-guide/edid.rst
index 80deeb21a265..1a9b965aa486 100644
--- a/Documentation/admin-guide/edid.rst
+++ b/Documentation/admin-guide/edid.rst
@@ -24,37 +24,4 @@ restrictions later on.
As a remedy for such situations, the kernel configuration item
CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
individually prepared or corrected EDID data set in the /lib/firmware
-directory from where it is loaded via the firmware interface. The code
-(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
-commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
-1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
-not contain code to create these data. In order to elucidate the origin
-of the built-in binary EDID blobs and to facilitate the creation of
-individual data for a specific misbehaving monitor, commented sources
-and a Makefile environment are given here.
-
-To create binary EDID and C source code files from the existing data
-material, simply type "make" in tools/edid/.
-
-If you want to create your own EDID file, copy the file 1024x768.S,
-replace the settings with your own data and add a new target to the
-Makefile. Please note that the EDID data structure expects the timing
-values in a different way as compared to the standard X11 format.
-
-X11:
- HTimings:
- hdisp hsyncstart hsyncend htotal
- VTimings:
- vdisp vsyncstart vsyncend vtotal
-
-EDID::
-
- #define XPIX hdisp
- #define XBLANK htotal-hdisp
- #define XOFFSET hsyncstart-hdisp
- #define XPULSE hsyncend-hsyncstart
-
- #define YPIX vdisp
- #define YBLANK vtotal-vdisp
- #define YOFFSET vsyncstart-vdisp
- #define YPULSE vsyncend-vsyncstart
+directory from where it is loaded via the firmware interface.
diff --git a/Documentation/admin-guide/efi-stub.rst b/Documentation/admin-guide/efi-stub.rst
index 090f3a185e18..f8e7407698bd 100644
--- a/Documentation/admin-guide/efi-stub.rst
+++ b/Documentation/admin-guide/efi-stub.rst
@@ -79,6 +79,9 @@ because the image we're executing is interpreted by the EFI shell,
which understands relative paths, whereas the rest of the command line
is passed to bzImage.efi.
+.. hint::
+ It is also possible to provide an initrd using a Linux-specific UEFI
+ protocol at boot time. See :ref:`pe-coff-entry-point` for details.
The "dtb=" option
-----------------
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst
index 5740d85439ff..ac0c709ea9e7 100644
--- a/Documentation/admin-guide/ext4.rst
+++ b/Documentation/admin-guide/ext4.rst
@@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
that ext4's inode table readahead algorithm will pre-read into the
buffer cache. The default value is 32 blocks.
- nouser_xattr
- Disables Extended User Attributes. See the attr(5) manual page for
- more information about extended attributes.
-
- noacl
- This option disables POSIX Access Control List support. If ACL support
- is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
- is enabled by default on mount. See the acl(5) manual page for more
- information about acl.
-
bsddf (*)
Make 'df' act like BSD.
@@ -248,11 +238,10 @@ When mounting an ext4 filesystem, the following option are accepted:
configured using tune2fs)
data_err=ignore(*)
- Just print an error message if an error occurs in a file data buffer in
- ordered mode.
+ Just print an error message if an error occurs in a file data buffer.
+
data_err=abort
- Abort the journal if an error occurs in a file data buffer in ordered
- mode.
+ Abort the journal if an error occurs in a file data buffer.
grpid | bsdgroups
New objects have the group ID of their parent.
@@ -409,7 +398,7 @@ There are 3 different data modes:
* writeback mode
In data=writeback mode, ext4 does not journal data at all. This mode provides
- a similar level of journaling as that of XFS, JFS, and ReiserFS in its default
+ a similar level of journaling as that of XFS and JFS in its default
mode - metadata journaling. A crash+recovery can cause incorrect data to
appear in files which were written shortly before the crash. This mode will
typically provide the best ext4 performance.
diff --git a/Documentation/admin-guide/gpio/gpio-aggregator.rst b/Documentation/admin-guide/gpio/gpio-aggregator.rst
index 5cd1e7221756..8374a9df9105 100644
--- a/Documentation/admin-guide/gpio/gpio-aggregator.rst
+++ b/Documentation/admin-guide/gpio/gpio-aggregator.rst
@@ -69,6 +69,113 @@ write-only attribute files in sysfs.
$ echo gpio-aggregator.0 > delete_device
+Aggregating GPIOs using Configfs
+--------------------------------
+
+**Group:** ``/config/gpio-aggregator``
+
+ This is the root directory of the gpio-aggregator configfs tree.
+
+**Group:** ``/config/gpio-aggregator/<example-name>``
+
+ This directory represents a GPIO aggregator device. You can assign any
+ name to ``<example-name>`` (e.g. ``agg0``), except names starting with
+ ``_sysfs`` prefix, which are reserved for auto-generated configfs
+ entries corresponding to devices created via Sysfs.
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/live``
+
+ The ``live`` attribute allows to trigger the actual creation of the device
+ once it's fully configured. Accepted values are:
+
+ * ``1``, ``yes``, ``true`` : enable the virtual device
+ * ``0``, ``no``, ``false`` : disable the virtual device
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/dev_name``
+
+ The read-only ``dev_name`` attribute exposes the name of the device as it
+ will appear in the system on the platform bus (e.g. ``gpio-aggregator.0``).
+ This is useful for identifying a character device for the newly created
+ aggregator. If it's ``gpio-aggregator.0``,
+ ``/sys/devices/platform/gpio-aggregator.0/gpiochipX`` path tells you that the
+ GPIO device id is ``X``.
+
+You must create subdirectories for each virtual line you want to
+instantiate, named exactly as ``line0``, ``line1``, ..., ``lineY``, when
+you want to instantiate ``Y+1`` (Y >= 0) lines. Configure all lines before
+activating the device by setting ``live`` to 1.
+
+**Group:** ``/config/gpio-aggregator/<example-name>/<lineY>/``
+
+ This directory represents a GPIO line to include in the aggregator.
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/key``
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/offset``
+
+ The default values after creating the ``<lineY>`` directory are:
+
+ * ``key`` : <empty>
+ * ``offset`` : -1
+
+ ``key`` must always be explicitly configured, while ``offset`` depends.
+ Two configuration patterns exist for each ``<lineY>``:
+
+ (a). For lookup by GPIO line name:
+
+ * Set ``key`` to the line name.
+ * Ensure ``offset`` remains -1 (the default).
+
+ (b). For lookup by GPIO chip name and the line offset within the chip:
+
+ * Set ``key`` to the chip name.
+ * Set ``offset`` to the line offset (0 <= ``offset`` < 65535).
+
+**Attribute:** ``/config/gpio-aggregator/<example-name>/<lineY>/name``
+
+ The ``name`` attribute sets a custom name for lineY. If left unset, the
+ line will remain unnamed.
+
+Once the configuration is done, the ``'live'`` attribute must be set to 1
+in order to instantiate the aggregator device. It can be set back to 0 to
+destroy the virtual device. The module will synchronously wait for the new
+aggregator device to be successfully probed and if this doesn't happen, writing
+to ``'live'`` will result in an error. This is a different behaviour from the
+case when you create it using sysfs ``new_device`` interface.
+
+.. note::
+
+ For aggregators created via Sysfs, the configfs entries are
+ auto-generated and appear as ``/config/gpio-aggregator/_sysfs.<N>/``. You
+ cannot add or remove line directories with mkdir(2)/rmdir(2). To modify
+ lines, you must use the "delete_device" interface to tear down the
+ existing device and reconfigure it from scratch. However, you can still
+ toggle the aggregator with the ``live`` attribute and adjust the
+ ``key``, ``offset``, and ``name`` attributes for each line when ``live``
+ is set to 0 by hand (i.e. it's not waiting for deferred probe).
+
+Sample configuration commands
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: sh
+
+ # Create a directory for an aggregator device
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0
+
+ # Configure each line
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0/line0
+ $ echo gpiochip0 > /sys/kernel/config/gpio-aggregator/agg0/line0/key
+ $ echo 6 > /sys/kernel/config/gpio-aggregator/agg0/line0/offset
+ $ echo test0 > /sys/kernel/config/gpio-aggregator/agg0/line0/name
+ $ mkdir /sys/kernel/config/gpio-aggregator/agg0/line1
+ $ echo gpiochip0 > /sys/kernel/config/gpio-aggregator/agg0/line1/key
+ $ echo 7 > /sys/kernel/config/gpio-aggregator/agg0/line1/offset
+ $ echo test1 > /sys/kernel/config/gpio-aggregator/agg0/line1/name
+
+ # Activate the aggregator device
+ $ echo 1 > /sys/kernel/config/gpio-aggregator/agg0/live
+
+
Generic GPIO Driver
-------------------
diff --git a/Documentation/admin-guide/gpio/gpio-mockup.rst b/Documentation/admin-guide/gpio/gpio-mockup.rst
index 493071da1738..d6e7438a7550 100644
--- a/Documentation/admin-guide/gpio/gpio-mockup.rst
+++ b/Documentation/admin-guide/gpio/gpio-mockup.rst
@@ -3,6 +3,14 @@
GPIO Testing Driver
===================
+.. note::
+
+ This module has been obsoleted by the more flexible gpio-sim.rst.
+ New developments should use that API and existing developments are
+ encouraged to migrate as soon as possible.
+ This module will continue to be maintained but no new features will be
+ added.
+
The GPIO Testing Driver (gpio-mockup) provides a way to create simulated GPIO
chips for testing purposes. The lines exposed by these chips can be accessed
using the standard GPIO character device interface as well as manipulated
diff --git a/Documentation/admin-guide/gpio/gpio-sim.rst b/Documentation/admin-guide/gpio/gpio-sim.rst
index 1cc5567a4bbe..f5135a14ef2e 100644
--- a/Documentation/admin-guide/gpio/gpio-sim.rst
+++ b/Documentation/admin-guide/gpio/gpio-sim.rst
@@ -50,8 +50,11 @@ the number of lines exposed by this bank.
**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/name``
-This group represents a single line at the offset Y. The 'name' attribute
-allows to set the line name as represented by the 'gpio-line-names' property.
+**Attribute:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/valid``
+
+This group represents a single line at the offset Y. The ``valid`` attribute
+indicates whether the line can be used as GPIO. The ``name`` attribute allows
+to set the line name as represented by the 'gpio-line-names' property.
**Item:** ``/config/gpio-sim/gpio-device/gpio-bankX/lineY/hog``
@@ -71,7 +74,7 @@ specific lines. The name of those subdirectories must take the form of:
``'line<offset>'`` (e.g. ``'line0'``, ``'line20'``, etc.) as the name will be
used by the module to assign the config to the specific line at given offset.
-Once the confiuration is complete, the ``'live'`` attribute must be set to 1 in
+Once the configuration is complete, the ``'live'`` attribute must be set to 1 in
order to instantiate the chip. It can be set back to 0 to destroy the simulated
chip. The module will synchronously wait for the new simulated device to be
successfully probed and if this doesn't happen, writing to ``'live'`` will
diff --git a/Documentation/admin-guide/gpio/gpio-virtuser.rst b/Documentation/admin-guide/gpio/gpio-virtuser.rst
new file mode 100644
index 000000000000..7e7c0df51640
--- /dev/null
+++ b/Documentation/admin-guide/gpio/gpio-virtuser.rst
@@ -0,0 +1,177 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+Virtual GPIO Consumer
+=====================
+
+The virtual GPIO Consumer module allows users to instantiate virtual devices
+that request GPIOs and then control their behavior over debugfs. Virtual
+consumer devices can be instantiated from device-tree or over configfs.
+
+A virtual consumer uses the driver-facing GPIO APIs and allows to cover it with
+automated tests driven by user-space. The GPIOs are requested using
+``gpiod_get_array()`` and so we support multiple GPIOs per connector ID.
+
+Creating GPIO consumers
+-----------------------
+
+The gpio-consumer module registers a configfs subsystem called
+``'gpio-virtuser'``. For details of the configfs filesystem, please refer to
+the configfs documentation.
+
+The user can create a hierarchy of configfs groups and items as well as modify
+values of exposed attributes. Once the consumer is instantiated, this hierarchy
+will be translated to appropriate device properties. The general structure is:
+
+**Group:** ``/config/gpio-virtuser``
+
+This is the top directory of the gpio-consumer configfs tree.
+
+**Group:** ``/config/gpio-consumer/example-name``
+
+**Attribute:** ``/config/gpio-consumer/example-name/live``
+
+**Attribute:** ``/config/gpio-consumer/example-name/dev_name``
+
+This is a directory representing a GPIO consumer device.
+
+The read-only ``dev_name`` attribute exposes the name of the device as it will
+appear in the system on the platform bus. This is useful for locating the
+associated debugfs directory under
+``/sys/kernel/debug/gpio-virtuser/$dev_name``.
+
+The ``'live'`` attribute allows to trigger the actual creation of the device
+once it's fully configured. The accepted values are: ``'1'`` to enable the
+virtual device and ``'0'`` to disable and tear it down.
+
+Creating GPIO lookup tables
+---------------------------
+
+Users can create a number of configfs groups under the device group:
+
+**Group:** ``/config/gpio-consumer/example-name/con_id``
+
+The ``'con_id'`` directory represents a single GPIO lookup and its value maps
+to the ``'con_id'`` argument of the ``gpiod_get()`` function. For example:
+``con_id`` == ``'reset'`` maps to the ``reset-gpios`` device property.
+
+Users can assign a number of GPIOs to each lookup. Each GPIO is a sub-directory
+with a user-defined name under the ``'con_id'`` group.
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/key``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/offset``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/drive``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/pull``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/active_low``
+
+**Attribute:** ``/config/gpio-consumer/example-name/con_id/0/transitory``
+
+This is a group describing a single GPIO in the ``con_id-gpios`` property.
+
+For virtual consumers created using configfs we use machine lookup tables so
+this group can be considered as a mapping between the filesystem and the fields
+of a single entry in ``'struct gpiod_lookup'``.
+
+The ``'key'`` attribute represents either the name of the chip this GPIO
+belongs to or the GPIO line name. This depends on the value of the ``'offset'``
+attribute: if its value is >= 0, then ``'key'`` represents the label of the
+chip to lookup while ``'offset'`` represents the offset of the line in that
+chip. If ``'offset'`` is < 0, then ``'key'`` represents the name of the line.
+
+The remaining attributes map to the ``'flags'`` field of the GPIO lookup
+struct. The first two take string values as arguments:
+
+**``'drive'``:** ``'push-pull'``, ``'open-drain'``, ``'open-source'``
+**``'pull'``:** ``'pull-up'``, ``'pull-down'``, ``'pull-disabled'``, ``'as-is'``
+
+``'active_low'`` and ``'transitory'`` are boolean attributes.
+
+Activating GPIO consumers
+-------------------------
+
+Once the configuration is complete, the ``'live'`` attribute must be set to 1 in
+order to instantiate the consumer. It can be set back to 0 to destroy the
+virtual device. The module will synchronously wait for the new simulated device
+to be successfully probed and if this doesn't happen, writing to ``'live'`` will
+result in an error.
+
+Device-tree
+-----------
+
+Virtual GPIO consumers can also be defined in device-tree. The compatible string
+must be: ``"gpio-virtuser"`` with at least one property following the
+standardized GPIO pattern.
+
+An example device-tree code defining a virtual GPIO consumer:
+
+.. code-block :: none
+
+ gpio-virt-consumer {
+ compatible = "gpio-virtuser";
+
+ foo-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>, <&gpio1 2 0>;
+ bar-gpios = <&gpio0 6 0>;
+ };
+
+Controlling virtual GPIO consumers
+----------------------------------
+
+Once active, the device will export debugfs attributes for controlling GPIO
+arrays as well as each requested GPIO line separately. Let's consider the
+following device property: ``foo-gpios = <&gpio0 0 0>, <&gpio0 4 0>;``.
+
+The following debugfs attribute groups will be created:
+
+**Group:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo/``
+
+This is the group that will contain the attributes for the entire GPIO array.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo/values``
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo/values_atomic``
+
+Both attributes allow to read and set arrays of GPIO values. User must pass
+exactly the number of values that the array contains in the form of a string
+containing zeroes and ones representing inactive and active GPIO states
+respectively. In this example: ``echo 11 > values``.
+
+The ``values_atomic`` attribute works the same as ``values`` but the kernel
+will execute the GPIO driver callbacks in interrupt context.
+
+**Group:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/``
+
+This is a group that represents a single GPIO with ``$index`` being its offset
+in the array.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/consumer``
+
+Allows to set and read the consumer label of the GPIO line.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/debounce``
+
+Allows to set and read the debounce period of the GPIO line.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/direction``
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/direction_atomic``
+
+These two attributes allow to set the direction of the GPIO line. They accept
+"input" and "output" as values. The atomic variant executes the driver callback
+in interrupt context.
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/interrupts``
+
+If the line is requested in input mode, writing ``1`` to this attribute will
+make the module listen for edge interrupts on the GPIO. Writing ``0`` disables
+the monitoring. Reading this attribute returns the current number of registered
+interrupts (both edges).
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/value``
+
+**Attribute:** ``/sys/kernel/debug/gpio-virtuser/$dev_name/gpiod:foo:$index/value_atomic``
+
+Both attributes allow to read and set values of individual requested GPIO lines.
+They accept the following values: ``1`` and ``0``.
diff --git a/Documentation/admin-guide/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst
index f6861ca16ffe..712f379731cb 100644
--- a/Documentation/admin-guide/gpio/index.rst
+++ b/Documentation/admin-guide/gpio/index.rst
@@ -1,16 +1,17 @@
.. SPDX-License-Identifier: GPL-2.0
====
-gpio
+GPIO
====
.. toctree::
:maxdepth: 1
+ Character Device Userspace API <../../userspace-api/gpio/chardev>
gpio-aggregator
- sysfs
- gpio-mockup
gpio-sim
+ gpio-virtuser
+ Obsolete APIs <obsolete>
.. only:: subproject and html
diff --git a/Documentation/admin-guide/gpio/obsolete.rst b/Documentation/admin-guide/gpio/obsolete.rst
new file mode 100644
index 000000000000..5adbff02d61f
--- /dev/null
+++ b/Documentation/admin-guide/gpio/obsolete.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+Obsolete GPIO APIs
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+ Character Device Userspace API (v1) <../../userspace-api/gpio/chardev_v1>
+ Sysfs Interface <../../userspace-api/gpio/sysfs>
+ Mockup Testing Module <gpio-mockup>
+
diff --git a/Documentation/admin-guide/highuid.rst b/Documentation/admin-guide/highuid.rst
deleted file mode 100644
index 6ee70465c0ea..000000000000
--- a/Documentation/admin-guide/highuid.rst
+++ /dev/null
@@ -1,80 +0,0 @@
-===================================================
-Notes on the change from 16-bit UIDs to 32-bit UIDs
-===================================================
-
-:Author: Chris Wing <wingc@umich.edu>
-:Last updated: January 11, 2000
-
-- kernel code MUST take into account __kernel_uid_t and __kernel_uid32_t
- when communicating between user and kernel space in an ioctl or data
- structure.
-
-- kernel code should use uid_t and gid_t in kernel-private structures and
- code.
-
-What's left to be done for 32-bit UIDs on all Linux architectures:
-
-- Disk quotas have an interesting limitation that is not related to the
- maximum UID/GID. They are limited by the maximum file size on the
- underlying filesystem, because quota records are written at offsets
- corresponding to the UID in question.
- Further investigation is needed to see if the quota system can cope
- properly with huge UIDs. If it can deal with 64-bit file offsets on all
- architectures, this should not be a problem.
-
-- Decide whether or not to keep backwards compatibility with the system
- accounting file, or if we should break it as the comments suggest
- (currently, the old 16-bit UID and GID are still written to disk, and
- part of the former pad space is used to store separate 32-bit UID and
- GID)
-
-- Need to validate that OS emulation calls the 16-bit UID
- compatibility syscalls, if the OS being emulated used 16-bit UIDs, or
- uses the 32-bit UID system calls properly otherwise.
-
- This affects at least:
-
- - iBCS on Intel
-
- - sparc32 emulation on sparc64
- (need to support whatever new 32-bit UID system calls are added to
- sparc32)
-
-- Validate that all filesystems behave properly.
-
- At present, 32-bit UIDs _should_ work for:
-
- - ext2
- - ufs
- - isofs
- - nfs
- - coda
- - udf
-
- Ioctl() fixups have been made for:
-
- - ncpfs
- - smbfs
-
- Filesystems with simple fixups to prevent 16-bit UID wraparound:
-
- - minix
- - sysv
- - qnx4
-
- Other filesystems have not been checked yet.
-
-- The ncpfs and smpfs filesystems cannot presently use 32-bit UIDs in
- all ioctl()s. Some new ioctl()s have been added with 32-bit UIDs, but
- more are needed. (as well as new user<->kernel data structures)
-
-- The ELF core dump format only supports 16-bit UIDs on arm, i386, m68k,
- sh, and sparc32. Fixing this is probably not that important, but would
- require adding a new ELF section.
-
-- The ioctl()s used to control the in-kernel NFS server only support
- 16-bit UIDs on arm, i386, m68k, sh, and sparc32.
-
-- make sure that the UID mapping feature of AX25 networking works properly
- (it should be safe because it's always used a 32-bit integer to
- communicate between user and kernel)
diff --git a/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
new file mode 100644
index 000000000000..d0bdbd81dcf9
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
@@ -0,0 +1,236 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Attack Vector Controls
+======================
+
+Attack vector controls provide a simple method to configure only the mitigations
+for CPU vulnerabilities which are relevant given the intended use of a system.
+Administrators are encouraged to consider which attack vectors are relevant and
+disable all others in order to recoup system performance.
+
+When new relevant CPU vulnerabilities are found, they will be added to these
+attack vector controls so administrators will likely not need to reconfigure
+their command line parameters as mitigations will continue to be correctly
+applied based on the chosen attack vector controls.
+
+Attack Vectors
+--------------
+
+There are 5 sets of attack-vector mitigations currently supported by the kernel:
+
+#. :ref:`user_kernel`
+#. :ref:`user_user`
+#. :ref:`guest_host`
+#. :ref:`guest_guest`
+#. :ref:`smt`
+
+To control the enabled attack vectors, see :ref:`cmdline`.
+
+.. _user_kernel:
+
+User-to-Kernel
+^^^^^^^^^^^^^^
+
+The user-to-kernel attack vector involves a malicious userspace program
+attempting to leak kernel data into userspace by exploiting a CPU vulnerability.
+The kernel data involved might be limited to certain kernel memory, or include
+all memory in the system, depending on the vulnerability exploited.
+
+If no untrusted userspace applications are being run, such as with single-user
+systems, consider disabling user-to-kernel mitigations.
+
+Note that the CPU vulnerabilities mitigated by Linux have generally not been
+shown to be exploitable from browser-based sandboxes. User-to-kernel
+mitigations are therefore mostly relevant if unknown userspace applications may
+be run by untrusted users.
+
+*user-to-kernel mitigations are enabled by default*
+
+.. _user_user:
+
+User-to-User
+^^^^^^^^^^^^
+
+The user-to-user attack vector involves a malicious userspace program attempting
+to influence the behavior of another unsuspecting userspace program in order to
+exfiltrate data. The vulnerability of a userspace program is based on the
+program itself and the interfaces it provides.
+
+If no untrusted userspace applications are being run, consider disabling
+user-to-user mitigations.
+
+Note that because the Linux kernel contains a mapping of all physical memory,
+preventing a malicious userspace program from leaking data from another
+userspace program requires mitigating user-to-kernel attacks as well for
+complete protection.
+
+*user-to-user mitigations are enabled by default*
+
+.. _guest_host:
+
+Guest-to-Host
+^^^^^^^^^^^^^
+
+The guest-to-host attack vector involves a malicious VM attempting to leak
+hypervisor data into the VM. The data involved may be limited, or may
+potentially include all memory in the system, depending on the vulnerability
+exploited.
+
+If no untrusted VMs are being run, consider disabling guest-to-host mitigations.
+
+*guest-to-host mitigations are enabled by default if KVM support is present*
+
+.. _guest_guest:
+
+Guest-to-Guest
+^^^^^^^^^^^^^^
+
+The guest-to-guest attack vector involves a malicious VM attempting to influence
+the behavior of another unsuspecting VM in order to exfiltrate data. The
+vulnerability of a VM is based on the code inside the VM itself and the
+interfaces it provides.
+
+If no untrusted VMs, or only a single VM is being run, consider disabling
+guest-to-guest mitigations.
+
+Similar to the user-to-user attack vector, preventing a malicious VM from
+leaking data from another VM requires mitigating guest-to-host attacks as well
+due to the Linux kernel phys map.
+
+*guest-to-guest mitigations are enabled by default if KVM support is present*
+
+.. _smt:
+
+Cross-Thread
+^^^^^^^^^^^^
+
+The cross-thread attack vector involves a malicious userspace program or
+malicious VM either observing or attempting to influence the behavior of code
+running on the SMT sibling thread in order to exfiltrate data.
+
+Many cross-thread attacks can only be mitigated if SMT is disabled, which will
+result in reduced CPU core count and reduced performance.
+
+If cross-thread mitigations are fully enabled ('auto,nosmt'), all mitigations
+for cross-thread attacks will be enabled. SMT may be disabled depending on
+which vulnerabilities are present in the CPU.
+
+If cross-thread mitigations are partially enabled ('auto'), mitigations for
+cross-thread attacks will be enabled but SMT will not be disabled.
+
+If cross-thread mitigations are disabled, no mitigations for cross-thread
+attacks will be enabled.
+
+Cross-thread mitigation may not be required if core-scheduling or similar
+techniques are used to prevent untrusted workloads from running on SMT siblings.
+
+*cross-thread mitigations default to partially enabled*
+
+.. _cmdline:
+
+Command Line Controls
+---------------------
+
+Attack vectors are controlled through the mitigations= command line option. The
+value provided begins with a global option and then may optionally include one
+or more options to disable various attack vectors.
+
+Format:
+ | ``mitigations=[global]``
+ | ``mitigations=[global],[attack vectors]``
+
+Global options:
+
+============ =============================================================
+Option Description
+============ =============================================================
+'off' All attack vectors disabled.
+'auto' All attack vectors enabled, partial cross-thread mitigations.
+'auto,nosmt' All attack vectors enabled, full cross-thread mitigations.
+============ =============================================================
+
+Attack vector options:
+
+================= =======================================
+Option Description
+================= =======================================
+'no_user_kernel' Disables user-to-kernel mitigations.
+'no_user_user' Disables user-to-user mitigations.
+'no_guest_host' Disables guest-to-host mitigations.
+'no_guest_guest' Disables guest-to-guest mitigations
+'no_cross_thread' Disables all cross-thread mitigations.
+================= =======================================
+
+Multiple attack vector options may be specified in a comma-separated list. If
+the global option is not specified, it defaults to 'auto'. The global option
+'off' is equivalent to disabling all attack vectors.
+
+Examples:
+ | ``mitigations=auto,no_user_kernel``
+
+ Enable all attack vectors except user-to-kernel. Partial cross-thread
+ mitigations.
+
+ | ``mitigations=auto,nosmt,no_guest_host,no_guest_guest``
+
+ Enable all attack vectors and cross-thread mitigations except for
+ guest-to-host and guest-to-guest mitigations.
+
+ | ``mitigations=,no_cross_thread``
+
+ Enable all attack vectors but not cross-thread mitigations.
+
+Interactions with command-line options
+--------------------------------------
+
+Vulnerability-specific controls (e.g. "retbleed=off") take precedence over all
+attack vector controls. Mitigations for individual vulnerabilities may be
+turned on or off via their command-line options regardless of the attack vector
+controls.
+
+Summary of attack-vector mitigations
+------------------------------------
+
+When a vulnerability is mitigated due to an attack-vector control, the default
+mitigation option for that particular vulnerability is used. To use a different
+mitigation, please use the vulnerability-specific command line option.
+
+The table below summarizes which vulnerabilities are mitigated when different
+attack vectors are enabled and assuming the CPU is vulnerable.
+
+=============== ============== ============ ============= ============== ============ ========
+Vulnerability User-to-Kernel User-to-User Guest-to-Host Guest-to-Guest Cross-Thread Notes
+=============== ============== ============ ============= ============== ============ ========
+BHI X X
+ITS X X
+GDS X X X X * (Note 1)
+L1TF X X * (Note 2)
+MDS X X X X * (Note 2)
+MMIO X X X X * (Note 2)
+Meltdown X
+Retbleed X X * (Note 3)
+RFDS X X X X
+Spectre_v1 X
+Spectre_v2 X X
+Spectre_v2_user X X * (Note 1)
+SRBDS X X X X
+SRSO X X X X
+SSB X
+TAA X X X X * (Note 2)
+TSA X X X X
+VMSCAPE X
+=============== ============== ============ ============= ============== ============ ========
+
+Notes:
+ 1 -- Can be mitigated without disabling SMT.
+
+ 2 -- Disables SMT if cross-thread mitigations are fully enabled and the CPU
+ is vulnerable
+
+ 3 -- Disables SMT if cross-thread mitigations are fully enabled, the CPU is
+ vulnerable, and STIBP is not supported
+
+When an attack-vector is disabled, all mitigations for the vulnerabilities
+listed in the above table are disabled, unless mitigation is required for a
+different enabled attack-vector or a mitigation is explicitly selected via a
+vulnerability-specific command line option.
diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
index cf1eeefdfc32..a92e10ec402e 100644
--- a/Documentation/admin-guide/hw-vuln/core-scheduling.rst
+++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
@@ -67,8 +67,8 @@ arg4:
will be performed for all tasks in the task group of ``pid``.
arg5:
- userspace pointer to an unsigned long for storing the cookie returned by
- ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands.
+ userspace pointer to an unsigned long long for storing the cookie returned
+ by ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands.
In order for a process to push a cookie to, or pull a cookie from a process, it
is required to have the ptrace access mode: `PTRACE_MODE_READ_REALCREDS` to the
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index de99caabf65a..55d747511f83 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -9,6 +9,7 @@ are configurable at compile, boot or run time.
.. toctree::
:maxdepth: 1
+ attack_vector_controls
spectre
l1tf
mds
@@ -21,3 +22,8 @@ are configurable at compile, boot or run time.
cross-thread-rsb
srso
gather_data_sampling
+ reg-file-data-sampling
+ rsb
+ old_microcode
+ indirect-target-selection
+ vmscape
diff --git a/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
new file mode 100644
index 000000000000..d9ca64108d23
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
@@ -0,0 +1,168 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Indirect Target Selection (ITS)
+===============================
+
+ITS is a vulnerability in some Intel CPUs that support Enhanced IBRS and were
+released before Alder Lake. ITS may allow an attacker to control the prediction
+of indirect branches and RETs located in the lower half of a cacheline.
+
+ITS is assigned CVE-2024-28956 with a CVSS score of 4.7 (Medium).
+
+Scope of Impact
+---------------
+- **eIBRS Guest/Host Isolation**: Indirect branches in KVM/kernel may still be
+ predicted with unintended target corresponding to a branch in the guest.
+
+- **Intra-Mode BTI**: In-kernel training such as through cBPF or other native
+ gadgets.
+
+- **Indirect Branch Prediction Barrier (IBPB)**: After an IBPB, indirect
+ branches may still be predicted with targets corresponding to direct branches
+ executed prior to the IBPB. This is fixed by the IPU 2025.1 microcode, which
+ should be available via distro updates. Alternatively microcode can be
+ obtained from Intel's github repository [#f1]_.
+
+Affected CPUs
+-------------
+Below is the list of ITS affected CPUs [#f2]_ [#f3]_:
+
+ ======================== ============ ==================== ===============
+ Common name Family_Model eIBRS Intra-mode BTI
+ Guest/Host Isolation
+ ======================== ============ ==================== ===============
+ SKYLAKE_X (step >= 6) 06_55H Affected Affected
+ ICELAKE_X 06_6AH Not affected Affected
+ ICELAKE_D 06_6CH Not affected Affected
+ ICELAKE_L 06_7EH Not affected Affected
+ TIGERLAKE_L 06_8CH Not affected Affected
+ TIGERLAKE 06_8DH Not affected Affected
+ KABYLAKE_L (step >= 12) 06_8EH Affected Affected
+ KABYLAKE (step >= 13) 06_9EH Affected Affected
+ COMETLAKE 06_A5H Affected Affected
+ COMETLAKE_L 06_A6H Affected Affected
+ ROCKETLAKE 06_A7H Not affected Affected
+ ======================== ============ ==================== ===============
+
+- All affected CPUs enumerate Enhanced IBRS feature.
+- IBPB isolation is affected on all ITS affected CPUs, and need a microcode
+ update for mitigation.
+- None of the affected CPUs enumerate BHI_CTRL which was introduced in Golden
+ Cove (Alder Lake and Sapphire Rapids). This can help guests to determine the
+ host's affected status.
+- Intel Atom CPUs are not affected by ITS.
+
+Mitigation
+----------
+As only the indirect branches and RETs that have their last byte of instruction
+in the lower half of the cacheline are vulnerable to ITS, the basic idea behind
+the mitigation is to not allow indirect branches in the lower half.
+
+This is achieved by relying on existing retpoline support in the kernel, and in
+compilers. ITS-vulnerable retpoline sites are runtime patched to point to newly
+added ITS-safe thunks. These safe thunks consists of indirect branch in the
+second half of the cacheline. Not all retpoline sites are patched to thunks, if
+a retpoline site is evaluated to be ITS-safe, it is replaced with an inline
+indirect branch.
+
+Dynamic thunks
+~~~~~~~~~~~~~~
+From a dynamically allocated pool of safe-thunks, each vulnerable site is
+replaced with a new thunk, such that they get a unique address. This could
+improve the branch prediction accuracy. Also, it is a defense-in-depth measure
+against aliasing.
+
+Note, for simplicity, indirect branches in eBPF programs are always replaced
+with a jump to a static thunk in __x86_indirect_its_thunk_array. If required,
+in future this can be changed to use dynamic thunks.
+
+All vulnerable RETs are replaced with a static thunk, they do not use dynamic
+thunks. This is because RETs get their prediction from RSB mostly that does not
+depend on source address. RETs that underflow RSB may benefit from dynamic
+thunks. But, RETs significantly outnumber indirect branches, and any benefit
+from a unique source address could be outweighed by the increased icache
+footprint and iTLB pressure.
+
+Retpoline
+~~~~~~~~~
+Retpoline sequence also mitigates ITS-unsafe indirect branches. For this
+reason, when retpoline is enabled, ITS mitigation only relocates the RETs to
+safe thunks. Unless user requested the RSB-stuffing mitigation.
+
+RSB Stuffing
+~~~~~~~~~~~~
+RSB-stuffing via Call Depth Tracking is a mitigation for Retbleed RSB-underflow
+attacks. And it also mitigates RETs that are vulnerable to ITS.
+
+Mitigation in guests
+^^^^^^^^^^^^^^^^^^^^
+All guests deploy ITS mitigation by default, irrespective of eIBRS enumeration
+and Family/Model of the guest. This is because eIBRS feature could be hidden
+from a guest. One exception to this is when a guest enumerates BHI_DIS_S, which
+indicates that the guest is running on an unaffected host.
+
+To prevent guests from unnecessarily deploying the mitigation on unaffected
+platforms, Intel has defined ITS_NO bit(62) in MSR IA32_ARCH_CAPABILITIES. When
+a guest sees this bit set, it should not enumerate the ITS bug. Note, this bit
+is not set by any hardware, but is **intended for VMMs to synthesize** it for
+guests as per the host's affected status.
+
+Mitigation options
+^^^^^^^^^^^^^^^^^^
+The ITS mitigation can be controlled using the "indirect_target_selection"
+kernel parameter. The available options are:
+
+ ======== ===================================================================
+ on (default) Deploy the "Aligned branch/return thunks" mitigation.
+ If spectre_v2 mitigation enables retpoline, aligned-thunks are only
+ deployed for the affected RET instructions. Retpoline mitigates
+ indirect branches.
+
+ off Disable ITS mitigation.
+
+ vmexit Equivalent to "=on" if the CPU is affected by guest/host isolation
+ part of ITS. Otherwise, mitigation is not deployed. This option is
+ useful when host userspace is not in the threat model, and only
+ attacks from guest to host are considered.
+
+ stuff Deploy RSB-fill mitigation when retpoline is also deployed.
+ Otherwise, deploy the default mitigation. When retpoline mitigation
+ is enabled, RSB-stuffing via Call-Depth-Tracking also mitigates
+ ITS.
+
+ force Force the ITS bug and deploy the default mitigation.
+ ======== ===================================================================
+
+Sysfs reporting
+---------------
+
+The sysfs file showing ITS mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/indirect_target_selection
+
+Note, microcode mitigation status is not reported in this file.
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - Not affected
+ - The processor is not vulnerable.
+ * - Vulnerable
+ - System is vulnerable and no mitigation has been applied.
+ * - Vulnerable, KVM: Not affected
+ - System is vulnerable to intra-mode BTI, but not affected by eIBRS
+ guest/host isolation.
+ * - Mitigation: Aligned branch/return thunks
+ - The mitigation is enabled, affected indirect branches and RETs are
+ relocated to safe thunks.
+ * - Mitigation: Retpolines, Stuffing RSB
+ - The mitigation is enabled using retpoline and RSB stuffing.
+
+References
+----------
+.. [#f1] Microcode repository - https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files
+
+.. [#f2] Affected Processors list - https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
+
+.. [#f3] Affected Processors list (machine readable) - https://github.com/intel/Intel-affected-processor-list
diff --git a/Documentation/admin-guide/hw-vuln/l1d_flush.rst b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
index 210020bc3f56..35dc25159b28 100644
--- a/Documentation/admin-guide/hw-vuln/l1d_flush.rst
+++ b/Documentation/admin-guide/hw-vuln/l1d_flush.rst
@@ -31,7 +31,7 @@ specifically opt into the feature to enable it.
Mitigation
----------
-When PR_SET_L1D_FLUSH is enabled for a task a flush of the L1D cache is
+When PR_SPEC_L1D_FLUSH is enabled for a task a flush of the L1D cache is
performed when the task is scheduled out and the incoming task belongs to a
different process and therefore to a different address space.
diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst
index 48c7b0b72aed..754679db0ce8 100644
--- a/Documentation/admin-guide/hw-vuln/mds.rst
+++ b/Documentation/admin-guide/hw-vuln/mds.rst
@@ -214,7 +214,7 @@ XEON PHI specific considerations
command line with the 'ring3mwait=disable' command line option.
XEON PHI is not affected by the other MDS variants and MSBDS is mitigated
- before the CPU enters a idle state. As XEON PHI is not affected by L1TF
+ before the CPU enters an idle state. As XEON PHI is not affected by L1TF
either disabling SMT is not required for full protection.
.. _mds_smt_control:
diff --git a/Documentation/admin-guide/hw-vuln/old_microcode.rst b/Documentation/admin-guide/hw-vuln/old_microcode.rst
new file mode 100644
index 000000000000..6ded8f86b8d0
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/old_microcode.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Old Microcode
+=============
+
+The kernel keeps a table of released microcode. Systems that had
+microcode older than this at boot will say "Vulnerable". This means
+that the system was vulnerable to some known CPU issue. It could be
+security or functional, the kernel does not know or care.
+
+You should update the CPU microcode to mitigate any exposure. This is
+usually accomplished by updating the files in
+/lib/firmware/intel-ucode/ via normal distribution updates. Intel also
+distributes these files in a github repo:
+
+ https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files.git
+
+Just like all the other hardware vulnerabilities, exposure is
+determined at boot. Runtime microcode updates do not change the status
+of this vulnerability.
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
index 1302fd1b55e8..6dba18dbb9ab 100644
--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in
combination with a microcode update. The microcode clears the affected CPU
buffers when the VERW instruction is executed.
-Kernel reuses the MDS function to invoke the buffer clearing:
-
- mds_clear_cpu_buffers()
+Kernel does the buffer clearing with x86_clear_cpu_buffers().
On MDS affected CPUs, the kernel already invokes CPU buffer clear on
kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
new file mode 100644
index 000000000000..ad15417d39f9
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
@@ -0,0 +1,96 @@
+==================================
+Register File Data Sampling (RFDS)
+==================================
+
+Register File Data Sampling (RFDS) is a microarchitectural vulnerability that
+only affects Intel Atom parts(also branded as E-cores). RFDS may allow
+a malicious actor to infer data values previously used in floating point
+registers, vector registers, or integer registers. RFDS does not provide the
+ability to choose which data is inferred. CVE-2023-28746 is assigned to RFDS.
+
+Affected Processors
+===================
+Below is the list of affected Intel processors [#f1]_:
+
+ =================== ============
+ Common name Family_Model
+ =================== ============
+ ATOM_GOLDMONT 06_5CH
+ ATOM_GOLDMONT_D 06_5FH
+ ATOM_GOLDMONT_PLUS 06_7AH
+ ATOM_TREMONT_D 06_86H
+ ATOM_TREMONT 06_96H
+ ALDERLAKE 06_97H
+ ALDERLAKE_L 06_9AH
+ ATOM_TREMONT_L 06_9CH
+ RAPTORLAKE 06_B7H
+ RAPTORLAKE_P 06_BAH
+ ATOM_GRACEMONT 06_BEH
+ RAPTORLAKE_S 06_BFH
+ =================== ============
+
+Mitigation
+==========
+Intel released a microcode update that enables software to clear sensitive
+information using the VERW instruction. Like MDS, RFDS deploys the same
+mitigation strategy to force the CPU to clear the affected buffers before an
+attacker can extract the secrets. This is achieved by using the otherwise
+unused and obsolete VERW instruction in combination with a microcode update.
+The microcode clears the affected CPU buffers when the VERW instruction is
+executed.
+
+Mitigation points
+-----------------
+VERW is executed by the kernel before returning to user space, and by KVM
+before VMentry. None of the affected cores support SMT, so VERW is not required
+at C-state transitions.
+
+New bits in IA32_ARCH_CAPABILITIES
+----------------------------------
+Newer processors and microcode update on existing affected processors added new
+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
+vulnerability and mitigation capability:
+
+- Bit 27 - RFDS_NO - When set, processor is not affected by RFDS.
+- Bit 28 - RFDS_CLEAR - When set, processor is affected by RFDS, and has the
+ microcode that clears the affected buffers on VERW execution.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows to control RFDS mitigation at boot time with the
+parameter "reg_file_data_sampling=". The valid arguments are:
+
+ ========== =================================================================
+ on If the CPU is vulnerable, enable mitigation; CPU buffer clearing
+ on exit to userspace and before entering a VM.
+ off Disables mitigation.
+ ========== =================================================================
+
+Mitigation default is selected by CONFIG_MITIGATION_RFDS.
+
+Mitigation status information
+-----------------------------
+The Linux kernel provides a sysfs interface to enumerate the current
+vulnerability status of the system: whether the system is vulnerable, and
+which mitigations are active. The relevant sysfs file is:
+
+ /sys/devices/system/cpu/vulnerabilities/reg_file_data_sampling
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: No microcode'
+ - The processor is vulnerable but microcode is not updated.
+ * - 'Mitigation: Clear Register File'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+
+References
+----------
+.. [#f1] Affected Processors
+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
diff --git a/Documentation/admin-guide/hw-vuln/rsb.rst b/Documentation/admin-guide/hw-vuln/rsb.rst
new file mode 100644
index 000000000000..21dbf9cf25f8
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/rsb.rst
@@ -0,0 +1,268 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+RSB-related mitigations
+=======================
+
+.. warning::
+ Please keep this document up-to-date, otherwise you will be
+ volunteered to update it and convert it to a very long comment in
+ bugs.c!
+
+Since 2018 there have been many Spectre CVEs related to the Return Stack
+Buffer (RSB) (sometimes referred to as the Return Address Stack (RAS) or
+Return Address Predictor (RAP) on AMD).
+
+Information about these CVEs and how to mitigate them is scattered
+amongst a myriad of microarchitecture-specific documents.
+
+This document attempts to consolidate all the relevant information in
+once place and clarify the reasoning behind the current RSB-related
+mitigations. It's meant to be as concise as possible, focused only on
+the current kernel mitigations: what are the RSB-related attack vectors
+and how are they currently being mitigated?
+
+It's *not* meant to describe how the RSB mechanism operates or how the
+exploits work. More details about those can be found in the references
+below.
+
+Rather, this is basically a glorified comment, but too long to actually
+be one. So when the next CVE comes along, a kernel developer can
+quickly refer to this as a refresher to see what we're actually doing
+and why.
+
+At a high level, there are two classes of RSB attacks: RSB poisoning
+(Intel and AMD) and RSB underflow (Intel only). They must each be
+considered individually for each attack vector (and microarchitecture
+where applicable).
+
+----
+
+RSB poisoning (Intel and AMD)
+=============================
+
+SpectreRSB
+~~~~~~~~~~
+
+RSB poisoning is a technique used by SpectreRSB [#spectre-rsb]_ where
+an attacker poisons an RSB entry to cause a victim's return instruction
+to speculate to an attacker-controlled address. This can happen when
+there are unbalanced CALLs/RETs after a context switch or VMEXIT.
+
+* All attack vectors can potentially be mitigated by flushing out any
+ poisoned RSB entries using an RSB filling sequence
+ [#intel-rsb-filling]_ [#amd-rsb-filling]_ when transitioning between
+ untrusted and trusted domains. But this has a performance impact and
+ should be avoided whenever possible.
+
+ .. DANGER::
+ **FIXME**: Currently we're flushing 32 entries. However, some CPU
+ models have more than 32 entries. The loop count needs to be
+ increased for those. More detailed information is needed about RSB
+ sizes.
+
+* On context switch, the user->user mitigation requires ensuring the
+ RSB gets filled or cleared whenever IBPB gets written [#cond-ibpb]_
+ during a context switch:
+
+ * AMD:
+ On Zen 4+, IBPB (or SBPB [#amd-sbpb]_ if used) clears the RSB.
+ This is indicated by IBPB_RET in CPUID [#amd-ibpb-rsb]_.
+
+ On Zen < 4, the RSB filling sequence [#amd-rsb-filling]_ must be
+ always be done in addition to IBPB [#amd-ibpb-no-rsb]_. This is
+ indicated by X86_BUG_IBPB_NO_RET.
+
+ * Intel:
+ IBPB always clears the RSB:
+
+ "Software that executed before the IBPB command cannot control
+ the predicted targets of indirect branches executed after the
+ command on the same logical processor. The term indirect branch
+ in this context includes near return instructions, so these
+ predicted targets may come from the RSB." [#intel-ibpb-rsb]_
+
+* On context switch, user->kernel attacks are prevented by SMEP. User
+ space can only insert user space addresses into the RSB. Even
+ non-canonical addresses can't be inserted due to the page gap at the
+ end of the user canonical address space reserved by TASK_SIZE_MAX.
+ A SMEP #PF at instruction fetch prevents the kernel from speculatively
+ executing user space.
+
+ * AMD:
+ "Finally, branches that are predicted as 'ret' instructions get
+ their predicted targets from the Return Address Predictor (RAP).
+ AMD recommends software use a RAP stuffing sequence (mitigation
+ V2-3 in [2]) and/or Supervisor Mode Execution Protection (SMEP)
+ to ensure that the addresses in the RAP are safe for
+ speculation. Collectively, we refer to these mitigations as "RAP
+ Protection"." [#amd-smep-rsb]_
+
+ * Intel:
+ "On processors with enhanced IBRS, an RSB overwrite sequence may
+ not suffice to prevent the predicted target of a near return
+ from using an RSB entry created in a less privileged predictor
+ mode. Software can prevent this by enabling SMEP (for
+ transitions from user mode to supervisor mode) and by having
+ IA32_SPEC_CTRL.IBRS set during VM exits." [#intel-smep-rsb]_
+
+* On VMEXIT, guest->host attacks are mitigated by eIBRS (and PBRSB
+ mitigation if needed):
+
+ * AMD:
+ "When Automatic IBRS is enabled, the internal return address
+ stack used for return address predictions is cleared on VMEXIT."
+ [#amd-eibrs-vmexit]_
+
+ * Intel:
+ "On processors with enhanced IBRS, an RSB overwrite sequence may
+ not suffice to prevent the predicted target of a near return
+ from using an RSB entry created in a less privileged predictor
+ mode. Software can prevent this by enabling SMEP (for
+ transitions from user mode to supervisor mode) and by having
+ IA32_SPEC_CTRL.IBRS set during VM exits. Processors with
+ enhanced IBRS still support the usage model where IBRS is set
+ only in the OS/VMM for OSes that enable SMEP. To do this, such
+ processors will ensure that guest behavior cannot control the
+ RSB after a VM exit once IBRS is set, even if IBRS was not set
+ at the time of the VM exit." [#intel-eibrs-vmexit]_
+
+ Note that some Intel CPUs are susceptible to Post-barrier Return
+ Stack Buffer Predictions (PBRSB) [#intel-pbrsb]_, where the last
+ CALL from the guest can be used to predict the first unbalanced RET.
+ In this case the PBRSB mitigation is needed in addition to eIBRS.
+
+AMD RETBleed / SRSO / Branch Type Confusion
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On AMD, poisoned RSB entries can also be created by the AMD RETBleed
+variant [#retbleed-paper]_ [#amd-btc]_ or by Speculative Return Stack
+Overflow [#amd-srso]_ (Inception [#inception-paper]_). The kernel
+protects itself by replacing every RET in the kernel with a branch to a
+single safe RET.
+
+----
+
+RSB underflow (Intel only)
+==========================
+
+RSB Alternate (RSBA) ("Intel Retbleed")
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some Intel Skylake-generation CPUs are susceptible to the Intel variant
+of RETBleed [#retbleed-paper]_ (Return Stack Buffer Underflow
+[#intel-rsbu]_). If a RET is executed when the RSB buffer is empty due
+to mismatched CALLs/RETs or returning from a deep call stack, the branch
+predictor can fall back to using the Branch Target Buffer (BTB). If a
+user forces a BTB collision then the RET can speculatively branch to a
+user-controlled address.
+
+* Note that RSB filling doesn't fully mitigate this issue. If there
+ are enough unbalanced RETs, the RSB may still underflow and fall back
+ to using a poisoned BTB entry.
+
+* On context switch, user->user underflow attacks are mitigated by the
+ conditional IBPB [#cond-ibpb]_ on context switch which effectively
+ clears the BTB:
+
+ * "The indirect branch predictor barrier (IBPB) is an indirect branch
+ control mechanism that establishes a barrier, preventing software
+ that executed before the barrier from controlling the predicted
+ targets of indirect branches executed after the barrier on the same
+ logical processor." [#intel-ibpb-btb]_
+
+* On context switch and VMEXIT, user->kernel and guest->host RSB
+ underflows are mitigated by IBRS or eIBRS:
+
+ * "Enabling IBRS (including enhanced IBRS) will mitigate the "RSBU"
+ attack demonstrated by the researchers. As previously documented,
+ Intel recommends the use of enhanced IBRS, where supported. This
+ includes any processor that enumerates RRSBA but not RRSBA_DIS_S."
+ [#intel-rsbu]_
+
+ However, note that eIBRS and IBRS do not mitigate intra-mode attacks.
+ Like RRSBA below, this is mitigated by clearing the BHB on kernel
+ entry.
+
+ As an alternative to classic IBRS, call depth tracking (combined with
+ retpolines) can be used to track kernel returns and fill the RSB when
+ it gets close to being empty.
+
+Restricted RSB Alternate (RRSBA)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some newer Intel CPUs have Restricted RSB Alternate (RRSBA) behavior,
+which, similar to RSBA described above, also falls back to using the BTB
+on RSB underflow. The only difference is that the predicted targets are
+restricted to the current domain when eIBRS is enabled:
+
+* "Restricted RSB Alternate (RRSBA) behavior allows alternate branch
+ predictors to be used by near RET instructions when the RSB is
+ empty. When eIBRS is enabled, the predicted targets of these
+ alternate predictors are restricted to those belonging to the
+ indirect branch predictor entries of the current prediction domain.
+ [#intel-eibrs-rrsba]_
+
+When a CPU with RRSBA is vulnerable to Branch History Injection
+[#bhi-paper]_ [#intel-bhi]_, an RSB underflow could be used for an
+intra-mode BTI attack. This is mitigated by clearing the BHB on
+kernel entry.
+
+However if the kernel uses retpolines instead of eIBRS, it needs to
+disable RRSBA:
+
+* "Where software is using retpoline as a mitigation for BHI or
+ intra-mode BTI, and the processor both enumerates RRSBA and
+ enumerates RRSBA_DIS controls, it should disable this behavior."
+ [#intel-retpoline-rrsba]_
+
+----
+
+References
+==========
+
+.. [#spectre-rsb] `Spectre Returns! Speculation Attacks using the Return Stack Buffer <https://arxiv.org/pdf/1807.07940.pdf>`_
+
+.. [#intel-rsb-filling] "Empty RSB Mitigation on Skylake-generation" in `Retpoline: A Branch Target Injection Mitigation <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/retpoline-branch-target-injection-mitigation.html#inpage-nav-5-1>`_
+
+.. [#amd-rsb-filling] "Mitigation V2-3" in `Software Techniques for Managing Speculation <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/software-techniques-for-managing-speculation.pdf>`_
+
+.. [#cond-ibpb] Whether IBPB is written depends on whether the prev and/or next task is protected from Spectre attacks. It typically requires opting in per task or system-wide. For more details see the documentation for the ``spectre_v2_user`` cmdline option in Documentation/admin-guide/kernel-parameters.txt.
+
+.. [#amd-sbpb] IBPB without flushing of branch type predictions. Only exists for AMD.
+
+.. [#amd-ibpb-rsb] "Function 8000_0008h -- Processor Capacity Parameters and Extended Feature Identification" in `AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24594.pdf>`_. SBPB behaves the same way according to `this email <https://lore.kernel.org/5175b163a3736ca5fd01cedf406735636c99a>`_.
+
+.. [#amd-ibpb-no-rsb] `Spectre Attacks: Exploiting Speculative Execution <https://comsec.ethz.ch/wp-content/files/ibpb_sp25.pdf>`_
+
+.. [#intel-ibpb-rsb] "Introduction" in `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/post-barrier-return-stack-buffer-predictions.html>`_
+
+.. [#amd-smep-rsb] "Existing Mitigations" in `Technical Guidance for Mitigating Branch Type Confusion <https://www.amd.com/content/dam/amd/en/documents/resources/technical-guidance-for-mitigating-branch-type-confusion.pdf>`_
+
+.. [#intel-smep-rsb] "Enhanced IBRS" in `Indirect Branch Restricted Speculation <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/indirect-branch-restricted-speculation.html>`_
+
+.. [#amd-eibrs-vmexit] "Extended Feature Enable Register (EFER)" in `AMD64 Architecture Programmer's Manual Volume 2: System Programming <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf>`_
+
+.. [#intel-eibrs-vmexit] "Enhanced IBRS" in `Indirect Branch Restricted Speculation <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/indirect-branch-restricted-speculation.html>`_
+
+.. [#intel-pbrsb] `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/post-barrier-return-stack-buffer-predictions.html>`_
+
+.. [#retbleed-paper] `RETBleed: Arbitrary Speculative Code Execution with Return Instruction <https://comsec.ethz.ch/wp-content/files/retbleed_sec22.pdf>`_
+
+.. [#amd-btc] `Technical Guidance for Mitigating Branch Type Confusion <https://www.amd.com/content/dam/amd/en/documents/resources/technical-guidance-for-mitigating-branch-type-confusion.pdf>`_
+
+.. [#amd-srso] `Technical Update Regarding Speculative Return Stack Overflow <https://www.amd.com/content/dam/amd/en/documents/corporate/cr/speculative-return-stack-overflow-whitepaper.pdf>`_
+
+.. [#inception-paper] `Inception: Exposing New Attack Surfaces with Training in Transient Execution <https://comsec.ethz.ch/wp-content/files/inception_sec23.pdf>`_
+
+.. [#intel-rsbu] `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/return-stack-buffer-underflow.html>`_
+
+.. [#intel-ibpb-btb] `Indirect Branch Predictor Barrier' <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/indirect-branch-predictor-barrier.html>`_
+
+.. [#intel-eibrs-rrsba] "Guidance for RSBU" in `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/return-stack-buffer-underflow.html>`_
+
+.. [#bhi-paper] `Branch History Injection: On the Effectiveness of Hardware Mitigations Against Cross-Privilege Spectre-v2 Attacks <http://download.vusec.net/papers/bhi-spectre-bhb_sec22.pdf>`_
+
+.. [#intel-bhi] `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html>`_
+
+.. [#intel-retpoline-rrsba] "Retpoline" in `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 <https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html>`_
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index 32a8893e5617..4bb8549bee82 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS.
-Currently the only known real-world BHB attack vector is via
-unprivileged eBPF. Therefore, it's highly recommended to not enable
-unprivileged eBPF, especially when eIBRS is used (without retpolines).
-For a full mitigation against BHB attacks, it's recommended to use
-retpolines (or eIBRS combined with retpolines).
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.
Attack scenarios
----------------
@@ -407,7 +406,7 @@ The possible values in this file are:
- Single threaded indirect branch prediction (STIBP) status for protection
between different hyper threads. This feature can be controlled through
- prctl per process, or through kernel command line options. This is x86
+ prctl per process, or through kernel command line options. This is an x86
only feature. For more details see below.
==================== ========================================================
@@ -430,6 +429,23 @@ The possible values in this file are:
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== =======================================================
+ - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+ - System is not affected
+ * - BHI: Retpoline
+ - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+ - System is protected by BHI_DIS_S
+ * - BHI: SW loop, KVM SW loop
+ - System is protected by software clearing sequence
+ * - BHI: Vulnerable
+ - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+ - System is vulnerable; KVM is protected by software clearing sequence
+
Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
report vulnerability.
@@ -473,8 +489,8 @@ Spectre variant 2
-mindirect-branch=thunk-extern -mindirect-branch-register options.
If the kernel is compiled with a Clang compiler, the compiler needs
to support -mretpoline-external-thunk option. The kernel config
- CONFIG_RETPOLINE needs to be turned on, and the CPU needs to run with
- the latest updated microcode.
+ CONFIG_MITIGATION_RETPOLINE needs to be turned on, and the CPU needs
+ to run with the latest updated microcode.
On Intel Skylake-era systems the mitigation covers most, but not all,
cases. See :ref:`[3] <spec_ref3>` for more details.
@@ -484,7 +500,11 @@ Spectre variant 2
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
- Spectre v2 variant attacks.
+ some Spectre v2 variant attacks. The BHB can still influence the choice of
+ indirect branch predictor entry, and although branch predictor entries are
+ isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+ between modes. Systems which support BHI_DIS_S will set it to protect against
+ BHI attacks.
On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@@ -572,73 +592,19 @@ Spectre variant 2
Mitigation control on the kernel command line
---------------------------------------------
-Spectre variant 2 mitigation can be disabled or force enabled at the
-kernel command line.
-
- nospectre_v1
-
- [X86,PPC] Disable mitigations for Spectre Variant 1
- (bounds check bypass). With this option data leaks are
- possible in the system.
-
- nospectre_v2
-
- [X86] Disable all mitigations for the Spectre variant 2
- (indirect branch prediction) vulnerability. System may
- allow data leaks with this option, which is equivalent
- to spectre_v2=off.
-
-
- spectre_v2=
-
- [X86] Control mitigation of Spectre variant 2
- (indirect branch speculation) vulnerability.
- The default operation protects the kernel from
- user space attacks.
-
- on
- unconditionally enable, implies
- spectre_v2_user=on
- off
- unconditionally disable, implies
- spectre_v2_user=off
- auto
- kernel detects whether your CPU model is
- vulnerable
-
- Selecting 'on' will, and 'auto' may, choose a
- mitigation method at run time according to the
- CPU, the available microcode, the setting of the
- CONFIG_RETPOLINE configuration option, and the
- compiler with which the kernel was built.
-
- Selecting 'on' will also enable the mitigation
- against user space to user space task attacks.
-
- Selecting 'off' will disable both the kernel and
- the user space protections.
-
- Specific mitigations can also be selected manually:
-
- retpoline auto pick between generic,lfence
- retpoline,generic Retpolines
- retpoline,lfence LFENCE; indirect branch
- retpoline,amd alias for retpoline,lfence
- eibrs Enhanced/Auto IBRS
- eibrs,retpoline Enhanced/Auto IBRS + Retpolines
- eibrs,lfence Enhanced/Auto IBRS + LFENCE
- ibrs use IBRS to protect kernel
+In general the kernel selects reasonable default mitigations for the
+current CPU.
- Not specifying this option is equivalent to
- spectre_v2=auto.
+Spectre default mitigations can be disabled or changed at the kernel
+command line with the following options:
- In general the kernel by default selects
- reasonable mitigations for the current CPU. To
- disable Spectre variant 2 mitigations, boot with
- spectre_v2=off. Spectre variant 1 mitigations
- cannot be disabled.
+ - nospectre_v1
+ - nospectre_v2
+ - spectre_v2={option}
+ - spectre_v2_user={option}
+ - spectre_bhi={option}
-For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
+For more details on the available options, refer to Documentation/admin-guide/kernel-parameters.txt
Mitigation selection guide
--------------------------
@@ -698,7 +664,7 @@ Intel white papers:
.. _spec_ref1:
-[1] `Intel analysis of speculative execution side channels <https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf>`_.
+[1] `Intel analysis of speculative execution side channels <https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/analysis-of-speculative-execution-side-channels-white-paper.pdf>`_.
.. _spec_ref2:
@@ -716,7 +682,7 @@ AMD white papers:
.. _spec_ref5:
-[5] `AMD64 technology indirect branch control extension <https://developer.amd.com/wp-content/resources/Architecture_Guidelines_Update_Indirect_Branch_Control.pdf>`_.
+[5] `AMD64 technology indirect branch control extension <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/white-papers/111006-architecture-guidelines-update-amd64-technology-indirect-branch-control-extension.pdf>`_.
.. _spec_ref6:
@@ -742,7 +708,7 @@ MIPS white paper:
.. _spec_ref10:
-[10] `MIPS: response on speculative execution and side channel vulnerabilities <https://www.mips.com/blog/mips-response-on-speculative-execution-and-side-channel-vulnerabilities/>`_.
+[10] `MIPS: response on speculative execution and side channel vulnerabilities <https://web.archive.org/web/20220512003005if_/https://www.mips.com/blog/mips-response-on-speculative-execution-and-side-channel-vulnerabilities/>`_.
Academic papers:
diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst
index e715bfc09879..66af95251a3d 100644
--- a/Documentation/admin-guide/hw-vuln/srso.rst
+++ b/Documentation/admin-guide/hw-vuln/srso.rst
@@ -104,7 +104,20 @@ The possible values in this file are:
(spec_rstack_overflow=ibpb-vmexit)
+ * 'Mitigation: Reduced Speculation':
+ This mitigation gets automatically enabled when the above one "IBPB on
+ VMEXIT" has been selected and the CPU supports the BpSpecReduce bit.
+
+ It gets automatically enabled on machines which have the
+ SRSO_USER_KERNEL_NO=1 CPUID bit. In that case, the code logic is to switch
+ to the above =ibpb-vmexit mitigation because the user/kernel boundary is
+ not affected anymore and thus "safe RET" is not needed.
+
+ After enabling the IBPB on VMEXIT mitigation option, the BpSpecReduce bit
+ is detected (functionality present on all such machines) and that
+ practically overrides IBPB on VMEXIT as it has a lot less performance
+ impact and takes care of the guest->host attack vector too.
In order to exploit vulnerability, an attacker needs to:
@@ -135,7 +148,7 @@ and does not want to suffer the performance impact, one can always
disable the mitigation with spec_rstack_overflow=off.
Similarly, 'Mitigation: IBPB' is another full mitigation type employing
-an indrect branch prediction barrier after having applied the required
+an indirect branch prediction barrier after having applied the required
microcode patch for one's system. This mitigation comes also at
a performance cost.
@@ -158,3 +171,72 @@ poisoned BTB entry and using that safe one for all function returns.
In older Zen1 and Zen2, this is accomplished using a reinterpretation
technique similar to Retbleed one: srso_untrain_ret() and
srso_safe_ret().
+
+Checking the safe RET mitigation actually works
+-----------------------------------------------
+
+In case one wants to validate whether the SRSO safe RET mitigation works
+on a kernel, one could use two performance counters
+
+* PMC_0xc8 - Count of RET/RET lw retired
+* PMC_0xc9 - Count of RET/RET lw retired mispredicted
+
+and compare the number of RETs retired properly vs those retired
+mispredicted, in kernel mode. Another way of specifying those events
+is::
+
+ # perf list ex_ret_near_ret
+
+ List of pre-defined events (to be used in -e or -M):
+
+ core:
+ ex_ret_near_ret
+ [Retired Near Returns]
+ ex_ret_near_ret_mispred
+ [Retired Near Returns Mispredicted]
+
+Either the command using the event mnemonics::
+
+ # perf stat -e ex_ret_near_ret:k -e ex_ret_near_ret_mispred:k sleep 10s
+
+or using the raw PMC numbers::
+
+ # perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
+
+should give the same amount. I.e., every RET retired should be
+mispredicted::
+
+ [root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
+
+ Performance counter stats for 'sleep 10s':
+
+ 137,167 cpu/event=0xc8,umask=0/k
+ 137,173 cpu/event=0xc9,umask=0/k
+
+ 10.004110303 seconds time elapsed
+
+ 0.000000000 seconds user
+ 0.004462000 seconds sys
+
+vs the case when the mitigation is disabled (spec_rstack_overflow=off)
+or not functioning properly, showing usually a lot smaller number of
+mispredicted retired RETs vs the overall count of retired RETs during
+a workload::
+
+ [root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
+
+ Performance counter stats for 'sleep 10s':
+
+ 201,627 cpu/event=0xc8,umask=0/k
+ 4,074 cpu/event=0xc9,umask=0/k
+
+ 10.003267252 seconds time elapsed
+
+ 0.002729000 seconds user
+ 0.000000000 seconds sys
+
+Also, there is a selftest which performs the above, go to
+tools/testing/selftests/x86/ and do::
+
+ make srso
+ ./srso
diff --git a/Documentation/admin-guide/hw-vuln/vmscape.rst b/Documentation/admin-guide/hw-vuln/vmscape.rst
new file mode 100644
index 000000000000..d9b9a2b6c114
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/vmscape.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+VMSCAPE
+=======
+
+VMSCAPE is a vulnerability that may allow a guest to influence the branch
+prediction in host userspace. It particularly affects hypervisors like QEMU.
+
+Even if a hypervisor may not have any sensitive data like disk encryption keys,
+guest-userspace may be able to attack the guest-kernel using the hypervisor as
+a confused deputy.
+
+Affected processors
+-------------------
+
+The following CPU families are affected by VMSCAPE:
+
+**Intel processors:**
+ - Skylake generation (Parts without Enhanced-IBRS)
+ - Cascade Lake generation - (Parts affected by ITS guest/host separation)
+ - Alder Lake and newer (Parts affected by BHI)
+
+Note that, BHI affected parts that use BHB clearing software mitigation e.g.
+Icelake are not vulnerable to VMSCAPE.
+
+**AMD processors:**
+ - Zen series (families 0x17, 0x19, 0x1a)
+
+** Hygon processors:**
+ - Family 0x18
+
+Mitigation
+----------
+
+Conditional IBPB
+----------------
+
+Kernel tracks when a CPU has run a potentially malicious guest and issues an
+IBPB before the first exit to userspace after VM-exit. If userspace did not run
+between VM-exit and the next VM-entry, no IBPB is issued.
+
+Note that the existing userspace mitigation against Spectre-v2 is effective in
+protecting the userspace. They are insufficient to protect the userspace VMMs
+from a malicious guest. This is because Spectre-v2 mitigations are applied at
+context switch time, while the userspace VMM can run after a VM-exit without a
+context switch.
+
+Vulnerability enumeration and mitigation is not applied inside a guest. This is
+because nested hypervisors should already be deploying IBPB to isolate
+themselves from nested guests.
+
+SMT considerations
+------------------
+
+When Simultaneous Multi-Threading (SMT) is enabled, hypervisors can be
+vulnerable to cross-thread attacks. For complete protection against VMSCAPE
+attacks in SMT environments, STIBP should be enabled.
+
+The kernel will issue a warning if SMT is enabled without adequate STIBP
+protection. Warning is not issued when:
+
+- SMT is disabled
+- STIBP is enabled system-wide
+- Intel eIBRS is enabled (which implies STIBP protection)
+
+System information and options
+------------------------------
+
+The sysfs file showing VMSCAPE mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/vmscape
+
+The possible values in this file are:
+
+ * 'Not affected':
+
+ The processor is not vulnerable to VMSCAPE attacks.
+
+ * 'Vulnerable':
+
+ The processor is vulnerable and no mitigation has been applied.
+
+ * 'Mitigation: IBPB before exit to userspace':
+
+ Conditional IBPB mitigation is enabled. The kernel tracks when a CPU has
+ run a potentially malicious guest and issues an IBPB before the first
+ exit to userspace after VM-exit.
+
+ * 'Mitigation: IBPB on VMEXIT':
+
+ IBPB is issued on every VM-exit. This occurs when other mitigations like
+ RETBLEED or SRSO are already issuing IBPB on VM-exit.
+
+Mitigation control on the kernel command line
+----------------------------------------------
+
+The mitigation can be controlled via the ``vmscape=`` command line parameter:
+
+ * ``vmscape=off``:
+
+ Disable the VMSCAPE mitigation.
+
+ * ``vmscape=ibpb``:
+
+ Enable conditional IBPB mitigation (default when CONFIG_MITIGATION_VMSCAPE=y).
+
+ * ``vmscape=force``:
+
+ Force vulnerability detection and mitigation even on processors that are
+ not known to be affected.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index fb40a1f6f79e..259d79fbeb94 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -1,3 +1,4 @@
+=================================================
The Linux kernel user's and administrator's guide
=================================================
@@ -6,6 +7,9 @@ added to the kernel over time. There is, as yet, little overall order or
organization here — this material was not written to be a single, coherent
document! With luck things will improve quickly over time.
+General guides to kernel administration
+---------------------------------------
+
This initial section contains overall information, including the README
file describing the kernel as a whole, documentation on kernel parameters,
etc.
@@ -14,19 +18,44 @@ etc.
:maxdepth: 1
README
- kernel-parameters
devices
- sysctl/index
- abi
features
-This section describes CPU vulnerabilities and their mitigations.
+A big part of the kernel's administrative interface is the /proc and sysfs
+virtual filesystems; these documents describe how to interact with tem
+
+.. toctree::
+ :maxdepth: 1
+
+ sysfs-rules
+ sysctl/index
+ cputopology
+ abi
+
+Security-related documentation:
.. toctree::
:maxdepth: 1
hw-vuln/index
+ LSM/index
+ perf-security
+
+Booting the kernel
+------------------
+
+.. toctree::
+ :maxdepth: 1
+
+ bootconfig
+ kernel-parameters
+ efi-stub
+ initrd
+
+
+Tracking down and identifying problems
+--------------------------------------
Here is a set of documents aimed at users who are trying to track down
problems and bugs in particular.
@@ -37,6 +66,7 @@ problems and bugs in particular.
reporting-issues
reporting-regressions
quickly-build-trimmed-linux
+ verify-bugs-and-bisect-regressions
bug-hunting
bug-bisect
tainted-kernels
@@ -46,95 +76,119 @@ problems and bugs in particular.
kdump/index
perf/index
pstore-blk
+ clearing-warn-once
+ kernel-per-CPU-kthreads
+ lockup-watchdogs
+ RAS/index
+ sysrq
-This is the beginning of a section with information of interest to
-application developers. Documents covering various aspects of the kernel
-ABI will be found here.
+
+Core-kernel subsystems
+----------------------
+
+These documents describe core-kernel administration interfaces that are
+likely to be of interest on almost any system.
.. toctree::
:maxdepth: 1
- sysfs-rules
+ cgroup-v2
+ cgroup-v1/index
+ cpu-load
+ mm/index
+ module-signing
+ namespaces/index
+ numastat
+ pm/index
+ syscall-user-dispatch
-This is the beginning of a section with information of interest to
-application developers and system integrators doing analysis of the
-Linux kernel for safety critical applications. Documents supporting
-analysis of kernel interactions with applications, and key kernel
-subsystems expectations will be found here.
+Support for non-native binary formats. Note that some of these
+documents are ... old ...
.. toctree::
:maxdepth: 1
- workload-tracing
+ binfmt-misc
+ java
+ mono
+
-The rest of this manual consists of various unordered guides on how to
-configure specific aspects of kernel behavior to your liking.
+Block-layer and filesystem administration
+-----------------------------------------
.. toctree::
:maxdepth: 1
- acpi/index
- aoe/index
- auxdisplay/index
bcache
binderfs
- binfmt-misc
blockdev/index
- bootconfig
- braille-console
- btmrvl
- cgroup-v1/index
- cgroup-v2
cifs/index
- clearing-warn-once
- cpu-load
- cputopology
- dell_rbu
device-mapper/index
- edid
- efi-stub
ext4
filesystem-monitoring
nfs/index
- gpio/index
- highuid
- hw_random
- initrd
iostats
- java
jfs
- kernel-per-CPU-kthreads
+ md
+ ufs
+ xfs
+
+Device-specific guides
+----------------------
+
+How to configure your hardware within your Linux system.
+
+.. toctree::
+ :maxdepth: 1
+
+ acpi/index
+ aoe/index
+ auxdisplay/index
+ braille-console
+ btmrvl
+ dell_rbu
+ edid
+ gpio/index
+ hw_random
laptops/index
lcd-panel-cgram
- ldm
- lockup-watchdogs
- LSM/index
- md
media/index
- mm/index
- module-signing
- mono
- namespaces/index
- numastat
+ nvme-multipath
parport
- perf-security
- pm/index
- pmf
pnp
rapidio
- ras
rtc
serial-console
svga
- syscall-user-dispatch
- sysrq
thermal/index
thunderbolt
- ufs
- unicode
vga-softcursor
video-output
- xfs
+
+Workload analysis
+-----------------
+
+This is the beginning of a section with information of interest to
+application developers and system integrators doing analysis of the
+Linux kernel for safety critical applications. Documents supporting
+analysis of kernel interactions with applications, and key kernel
+subsystems expectations will be found here.
+
+.. toctree::
+ :maxdepth: 1
+
+ workload-tracing
+
+Everything else
+---------------
+
+A few hard-to-categorize and generally obsolete documents.
+
+.. toctree::
+ :maxdepth: 1
+
+ ldm
+ unicode
.. only:: subproject and html
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
index 609a3201fd4e..9453196ade51 100644
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@@ -2,62 +2,39 @@
I/O statistics fields
=====================
-Since 2.4.20 (and some versions before, with patches), and 2.5.45,
-more extensive disk statistics have been introduced to help measure disk
-activity. Tools such as ``sar`` and ``iostat`` typically interpret these and do
-the work for you, but in case you are interested in creating your own
-tools, the fields are explained here.
-
-In 2.4 now, the information is found as additional fields in
-``/proc/partitions``. In 2.6 and upper, the same information is found in two
-places: one is in the file ``/proc/diskstats``, and the other is within
-the sysfs file system, which must be mounted in order to obtain
-the information. Throughout this document we'll assume that sysfs
-is mounted on ``/sys``, although of course it may be mounted anywhere.
-Both ``/proc/diskstats`` and sysfs use the same source for the information
-and so should not differ.
-
-Here are examples of these different formats::
-
- 2.4:
- 3 0 39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
- 3 1 9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030
-
- 2.6+ sysfs:
- 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
- 35486 38030 38030 38030
-
- 2.6+ diskstats:
- 3 0 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
- 3 1 hda1 35486 38030 38030 38030
-
- 4.18+ diskstats:
- 3 0 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 0 0 0 0
-
-On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
-a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
-
-The advantage of one over the other is that the sysfs choice works well
-if you are watching a known, small set of disks. ``/proc/diskstats`` may
-be a better choice if you are watching a large number of disks because
-you'll avoid the overhead of 50, 100, or 500 or more opens/closes with
-each snapshot of your disk statistics.
-
-In 2.4, the statistics fields are those after the device name. In
-the above example, the first field of statistics would be 446216.
-By contrast, in 2.6+ if you look at ``/sys/block/hda/stat``, you'll
-find just the 15 fields, beginning with 446216. If you look at
-``/proc/diskstats``, the 15 fields will be preceded by the major and
-minor device numbers, and device name. Each of these formats provides
-15 fields of statistics, each meaning exactly the same things.
-All fields except field 9 are cumulative since boot. Field 9 should
-go to zero as I/Os complete; all others only increase (unless they
-overflow and wrap). Wrapping might eventually occur on a very busy
-or long-lived system; so applications should be prepared to deal with
-it. Regarding wrapping, the types of the fields are either unsigned
-int (32 bit) or unsigned long (32-bit or 64-bit, depending on your
-machine) as noted per-field below. Unless your observations are very
-spread in time, these fields should not wrap twice before you notice it.
+The kernel exposes disk statistics via ``/proc/diskstats`` and
+``/sys/block/<device>/stat``. These stats are usually accessed via tools
+such as ``sar`` and ``iostat``.
+
+Here are examples using a disk with two partitions::
+
+ /proc/diskstats:
+ 259 0 nvme0n1 255999 814 12369153 47919 996852 81 36123024 425995 0 301795 580470 0 0 0 0 60602 106555
+ 259 1 nvme0n1p1 492 813 17572 96 848 81 108288 210 0 76 307 0 0 0 0 0 0
+ 259 2 nvme0n1p2 255401 1 12343477 47799 996004 0 36014736 425784 0 344336 473584 0 0 0 0 0 0
+
+ /sys/block/nvme0n1/stat:
+ 255999 814 12369153 47919 996858 81 36123056 426009 0 301809 580491 0 0 0 0 60605 106562
+
+ /sys/block/nvme0n1/nvme0n1p1/stat:
+ 492 813 17572 96 848 81 108288 210 0 76 307 0 0 0 0 0 0
+
+Both files contain the same 17 statistics. ``/sys/block/<device>/stat``
+contains the fields for ``<device>``. In ``/proc/diskstats`` the fields
+are prefixed with the major and minor device numbers and the device
+name. In the example above, the first stat value for ``nvme0n1`` is
+255999 in both files.
+
+The sysfs ``stat`` file is efficient for monitoring a small, known set
+of disks. If you're tracking a large number of devices,
+``/proc/diskstats`` is often the better choice since it avoids the
+overhead of opening and closing multiple files for each snapshot.
+
+All fields are cumulative, monotonic counters, except for field 9, which
+resets to zero as I/Os complete. The remaining fields reset at boot, on
+device reattachment or reinitialization, or when the underlying counter
+overflows. Applications reading these counters should detect and handle
+resets when comparing stat snapshots.
Each set of stats only applies to the indicated device; if you want
system-wide stats you'll have to find all the devices and sum them all up.
diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index 5762e7477a0c..7b011eb116a7 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -136,10 +136,6 @@ System kernel config options
CONFIG_KEXEC_CORE=y
- Subsequently, CRASH_CORE is selected by KEXEC_CORE::
-
- CONFIG_CRASH_CORE=y
-
2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
filesystems." This is usually enabled by default::
@@ -168,6 +164,10 @@ Dump-capture kernel config options (Arch Independent)
CONFIG_CRASH_DUMP=y
+ And this will select VMCORE_INFO and CRASH_RESERVE::
+ CONFIG_VMCORE_INFO=y
+ CONFIG_CRASH_RESERVE=y
+
2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems"::
CONFIG_PROC_VMCORE=y
@@ -180,10 +180,6 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
1) On i386, enable high memory support under "Processor type and
features"::
- CONFIG_HIGHMEM64G=y
-
- or::
-
CONFIG_HIGHMEM4G
2) With CONFIG_SMP=y, usually nr_cpus=1 need specified on the kernel
@@ -191,9 +187,7 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
CPU is enough for kdump kernel to dump vmcore on most of systems.
However, you can also specify nr_cpus=X to enable multiple processors
- in kdump kernel. In this case, "disable_cpu_apicid=" is needed to
- tell kdump kernel which cpu is 1st kernel's BSP. Please refer to
- admin-guide/kernel-parameters.txt for more details.
+ in kdump kernel.
With CONFIG_SMP=n, the above things are not related.
@@ -317,6 +311,27 @@ crashkernel syntax
crashkernel=0,low
+4) crashkernel=size,cma
+
+ Reserve additional crash kernel memory from CMA. This reservation is
+ usable by the first system's userspace memory and kernel movable
+ allocations (memory balloon, zswap). Pages allocated from this memory
+ range will not be included in the vmcore so this should not be used if
+ dumping of userspace memory is intended and it has to be expected that
+ some movable kernel pages may be missing from the dump.
+
+ A standard crashkernel reservation, as described above, is still needed
+ to hold the crash kernel and initrd.
+
+ This option increases the risk of a kdump failure: DMA transfers
+ configured by the first kernel may end up corrupting the second
+ kernel's memory.
+
+ This reservation method is intended for systems that can't afford to
+ sacrifice enough memory for standard crashkernel reservation and where
+ less reliable and possibly incomplete kdump is preferable to no kdump at
+ all.
+
Boot into System Kernel
-----------------------
1) Update the boot loader (such as grub, yaboot, or lilo) configuration
@@ -454,10 +469,9 @@ Notes on loading the dump-capture kernel:
to use multi-thread programs with it, such as parallel dump feature of
makedumpfile. Otherwise, the multi-thread program may have a great
performance degradation. To enable multi-cpu support, you should bring up an
- SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X]
- options while loading it.
+ SMP dump-capture kernel and specify maxcpus/nr_cpus options while loading it.
-* For s390x there are two kdump modes: If a ELF header is specified with
+* For s390x there are two kdump modes: If an ELF header is specified with
the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
is done on all other architectures. If no elfcorehdr= kernel parameter is
specified, the s390x kdump kernel dynamically creates the header. The
@@ -554,6 +568,38 @@ from within add_taint() whenever the value set in this bitmask matches with the
bit flag being set by add_taint().
This will cause a kdump to occur at the add_taint()->panic() call.
+Write the dump file to encrypted disk volume
+============================================
+
+CONFIG_CRASH_DM_CRYPT can be enabled to support saving the dump file to an
+encrypted disk volume (only x86_64 supported for now). User space can interact
+with /sys/kernel/config/crash_dm_crypt_keys for setup,
+
+1. Tell the first kernel what logon keys are needed to unlock the disk volumes,
+ # Add key #1
+ mkdir /sys/kernel/config/crash_dm_crypt_keys/7d26b7b4-e342-4d2d-b660-7426b0996720
+ # Add key #1's description
+ echo cryptsetup:7d26b7b4-e342-4d2d-b660-7426b0996720 > /sys/kernel/config/crash_dm_crypt_keys/description
+
+ # how many keys do we have now?
+ cat /sys/kernel/config/crash_dm_crypt_keys/count
+ 1
+
+ # Add key #2 in the same way
+
+ # how many keys do we have now?
+ cat /sys/kernel/config/crash_dm_crypt_keys/count
+ 2
+
+ # To support CPU/memory hot-plugging, re-use keys already saved to reserved
+ # memory
+ echo true > /sys/kernel/config/crash_dm_crypt_key/reuse
+
+2. Load the dump-capture kernel
+
+3. After the dump-capture kerne get booted, restore the keys to user keyring
+ echo yes > /sys/kernel/crash_dm_crypt_keys/restore
+
Contact
=======
diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index bced9e4b6e08..404a15f6782c 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -65,11 +65,11 @@ Defines the beginning of the text section. In general, _stext indicates
the kernel start address. Used to convert a virtual address from the
direct kernel map to a physical address.
-vmap_area_list
---------------
+VMALLOC_START
+-------------
-Stores the virtual area list. makedumpfile gets the vmalloc start value
-from this variable and its value is necessary for vmalloc translation.
+Stores the base address of vmalloc area. makedumpfile gets this value
+since is necessary for vmalloc translation.
mem_map
-------
@@ -325,14 +325,14 @@ NR_FREE_PAGES
On linux-2.6.21 or later, the number of free pages is in
vm_stat[NR_FREE_PAGES]. Used to get the number of free pages.
-PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask|PG_hugetlb
------------------------------------------------------------------------------------------
+PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_hwpoison|PG_head_mask
+--------------------------------------------------------------------------
Page attributes. These flags are used to filter various unnecessary for
dumping pages.
-PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)
------------------------------------------------------------------------------
+PAGE_SLAB_MAPCOUNT_VALUE|PAGE_BUDDY_MAPCOUNT_VALUE|PAGE_OFFLINE_MAPCOUNT_VALUE|PAGE_HUGETLB_MAPCOUNT_VALUE|PAGE_UNACCEPTED_MAPCOUNT_VALUE
+------------------------------------------------------------------------------------------------------------------------------------------
More page attributes. These flags are used to filter various unnecessary for
dumping pages.
diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst
index 4410384596a9..02a725536cc5 100644
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
.. _kernelparameters:
The kernel's command-line parameters
@@ -27,6 +29,16 @@ kernel command line (/proc/cmdline) and collects module parameters
when it loads a module, so the kernel command line can be used for
loadable modules too.
+This document may not be entirely up to date and comprehensive. The command
+"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
+module. Loadable modules, after being loaded into the running kernel, also
+reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
+parameters may be changed at runtime by the command
+``echo -n ${value} > /sys/module/${modulename}/parameters/${parm}``.
+
+Special handling
+----------------
+
Hyphens (dashes) and underscores are equivalent in parameter names, so::
log_buf_len=1M print-fatal-signals=1
@@ -39,8 +51,8 @@ Double-quotes can be used to protect spaces in values, e.g.::
param="spaces in here"
-cpu lists:
-----------
+cpu lists
+~~~~~~~~~
Some kernel parameters take a list of CPUs as a value, e.g. isolcpus,
nohz_full, irqaffinity, rcu_nocbs. The format of this list is:
@@ -82,114 +94,23 @@ so that "nohz_full=all" is the equivalent of "nohz_full=0-N".
The semantics of "N" and "all" is supported on a level of bitmaps and holds for
all users of bitmap_parselist().
-This document may not be entirely up to date and comprehensive. The command
-"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
-module. Loadable modules, after being loaded into the running kernel, also
-reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
-parameters may be changed at runtime by the command
-``echo -n ${value} > /sys/module/${modulename}/parameters/${parm}``.
+Metric suffixes
+~~~~~~~~~~~~~~~
+
+The [KMG] suffix is commonly described after a number of kernel
+parameter values. 'K', 'M', 'G', 'T', 'P', and 'E' suffixes are allowed.
+These letters represent the _binary_ multipliers 'Kilo', 'Mega', 'Giga',
+'Tera', 'Peta', and 'Exa', equaling 2^10, 2^20, 2^30, 2^40, 2^50, and
+2^60 bytes respectively. Such letter suffixes can also be entirely omitted.
+
+Kernel Build Options
+--------------------
The parameters listed below are only valid if certain kernel build options
were enabled and if respective hardware is present. This list should be kept
in alphabetical order. The text in square brackets at the beginning
of each description states the restrictions within which a parameter
-is applicable::
-
- ACPI ACPI support is enabled.
- AGP AGP (Accelerated Graphics Port) is enabled.
- ALSA ALSA sound support is enabled.
- APIC APIC support is enabled.
- APM Advanced Power Management support is enabled.
- APPARMOR AppArmor support is enabled.
- ARM ARM architecture is enabled.
- ARM64 ARM64 architecture is enabled.
- AX25 Appropriate AX.25 support is enabled.
- CLK Common clock infrastructure is enabled.
- CMA Contiguous Memory Area support is enabled.
- DRM Direct Rendering Management support is enabled.
- DYNAMIC_DEBUG Build in debug messages and enable them at runtime
- EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
- EFI EFI Partitioning (GPT) is enabled
- EVM Extended Verification Module
- FB The frame buffer device is enabled.
- FTRACE Function tracing enabled.
- GCOV GCOV profiling is enabled.
- HIBERNATION HIBERNATION is enabled.
- HW Appropriate hardware is enabled.
- HYPER_V HYPERV support is enabled.
- IA-64 IA-64 architecture is enabled.
- IMA Integrity measurement architecture is enabled.
- IP_PNP IP DHCP, BOOTP, or RARP is enabled.
- IPV6 IPv6 support is enabled.
- ISAPNP ISA PnP code is enabled.
- ISDN Appropriate ISDN support is enabled.
- ISOL CPU Isolation is enabled.
- JOY Appropriate joystick support is enabled.
- KGDB Kernel debugger support is enabled.
- KVM Kernel Virtual Machine support is enabled.
- LIBATA Libata driver is enabled
- LOONGARCH LoongArch architecture is enabled.
- LOOP Loopback device support is enabled.
- LP Printer support is enabled.
- M68k M68k architecture is enabled.
- These options have more detailed description inside of
- Documentation/arch/m68k/kernel-options.rst.
- MDA MDA console support is enabled.
- MIPS MIPS architecture is enabled.
- MOUSE Appropriate mouse support is enabled.
- MSI Message Signaled Interrupts (PCI).
- MTD MTD (Memory Technology Device) support is enabled.
- NET Appropriate network support is enabled.
- NFS Appropriate NFS support is enabled.
- NUMA NUMA support is enabled.
- OF Devicetree is enabled.
- PARISC The PA-RISC architecture is enabled.
- PCI PCI bus support is enabled.
- PCIE PCI Express support is enabled.
- PCMCIA The PCMCIA subsystem is enabled.
- PNP Plug & Play support is enabled.
- PPC PowerPC architecture is enabled.
- PPT Parallel port support is enabled.
- PS2 Appropriate PS/2 support is enabled.
- PV_OPS A paravirtualized kernel is enabled.
- RAM RAM disk support is enabled.
- RDT Intel Resource Director Technology.
- RISCV RISCV architecture is enabled.
- S390 S390 architecture is enabled.
- SCSI Appropriate SCSI support is enabled.
- A lot of drivers have their options described inside
- the Documentation/scsi/ sub-directory.
- SECURITY Different security models are enabled.
- SELINUX SELinux support is enabled.
- SERIAL Serial support is enabled.
- SH SuperH architecture is enabled.
- SMP The kernel is an SMP kernel.
- SPARC Sparc architecture is enabled.
- SUSPEND System suspend states are enabled.
- SWSUSP Software suspend (hibernation) is enabled.
- TPM TPM drivers are enabled.
- UMS USB Mass Storage support is enabled.
- USB USB support is enabled.
- USBHID USB Human Interface Device support is enabled.
- V4L Video For Linux support is enabled.
- VGA The VGA console has been enabled.
- VMMIO Driver for memory mapped virtio devices is enabled.
- VT Virtual terminal support is enabled.
- WDT Watchdog support is enabled.
- X86-32 X86-32, aka i386 architecture is enabled.
- X86-64 X86-64 architecture is enabled.
- More X86-64 boot options can be found in
- Documentation/arch/x86/x86_64/boot-options.rst.
- X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
- X86_UV SGI UV support is enabled.
- XEN Xen support is enabled
- XTENSA xtensa architecture is enabled.
-
-In addition, the following text indicates that the option::
-
- BOOT Is a boot loader parameter.
- BUGS= Relates to possible processor bugs on the said processor.
- KNL Is a kernel start-up parameter.
+is applicable.
Parameters denoted with BOOT are actually interpreted by the boot
loader, and have no meaning to the kernel directly.
@@ -197,10 +118,9 @@ Do not modify the syntax of boot loader parameters without extreme
need or coordination with <Documentation/arch/x86/boot.rst>.
There are also arch-specific kernel-parameters not documented here.
-See for example <Documentation/arch/x86/x86_64/boot-options.rst>.
Note that ALL kernel parameters listed below are CASE SENSITIVE, and that
-a trailing = on the name of any parameter states that that parameter will
+a trailing = on the name of any parameter states that the parameter will
be entered as an environment variable, whereas its absence indicates that
it will appear as a kernel argument readable via /proc/cmdline by programs
running once the system is up.
@@ -211,10 +131,5 @@ a fixed number of characters. This limit depends on the architecture
and is between 256 and 4096 characters. It is defined in the file
./include/uapi/asm-generic/setup.h as COMMAND_LINE_SIZE.
-Finally, the [KMG] suffix is commonly described after a number of kernel
-parameter values. These 'K', 'M', and 'G' letters represent the _binary_
-multipliers 'Kilo', 'Mega', and 'Giga', equaling 2^10, 2^20, and 2^30
-bytes respectively. Such letter suffixes can also be entirely omitted:
-
.. include:: kernel-parameters.txt
:literal:
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 31b3a25680d0..a8d0afde7f85 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1,3 +1,101 @@
+ ACPI ACPI support is enabled.
+ AGP AGP (Accelerated Graphics Port) is enabled.
+ ALSA ALSA sound support is enabled.
+ APIC APIC support is enabled.
+ APM Advanced Power Management support is enabled.
+ APPARMOR AppArmor support is enabled.
+ ARM ARM architecture is enabled.
+ ARM64 ARM64 architecture is enabled.
+ AX25 Appropriate AX.25 support is enabled.
+ CLK Common clock infrastructure is enabled.
+ CMA Contiguous Memory Area support is enabled.
+ DRM Direct Rendering Management support is enabled.
+ DYNAMIC_DEBUG Build in debug messages and enable them at runtime
+ EARLY Parameter processed too early to be embedded in initrd.
+ EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
+ EFI EFI Partitioning (GPT) is enabled
+ EVM Extended Verification Module
+ FB The frame buffer device is enabled.
+ FTRACE Function tracing enabled.
+ GCOV GCOV profiling is enabled.
+ HIBERNATION HIBERNATION is enabled.
+ HW Appropriate hardware is enabled.
+ HYPER_V HYPERV support is enabled.
+ IMA Integrity measurement architecture is enabled.
+ IP_PNP IP DHCP, BOOTP, or RARP is enabled.
+ IPV6 IPv6 support is enabled.
+ ISAPNP ISA PnP code is enabled.
+ ISDN Appropriate ISDN support is enabled.
+ ISOL CPU Isolation is enabled.
+ JOY Appropriate joystick support is enabled.
+ KGDB Kernel debugger support is enabled.
+ KVM Kernel Virtual Machine support is enabled.
+ LIBATA Libata driver is enabled
+ LOONGARCH LoongArch architecture is enabled.
+ LOOP Loopback device support is enabled.
+ LP Printer support is enabled.
+ M68k M68k architecture is enabled.
+ These options have more detailed description inside of
+ Documentation/arch/m68k/kernel-options.rst.
+ MDA MDA console support is enabled.
+ MIPS MIPS architecture is enabled.
+ MOUSE Appropriate mouse support is enabled.
+ MSI Message Signaled Interrupts (PCI).
+ MTD MTD (Memory Technology Device) support is enabled.
+ NET Appropriate network support is enabled.
+ NFS Appropriate NFS support is enabled.
+ NUMA NUMA support is enabled.
+ OF Devicetree is enabled.
+ PARISC The PA-RISC architecture is enabled.
+ PCI PCI bus support is enabled.
+ PCIE PCI Express support is enabled.
+ PCMCIA The PCMCIA subsystem is enabled.
+ PNP Plug & Play support is enabled.
+ PPC PowerPC architecture is enabled.
+ PPT Parallel port support is enabled.
+ PS2 Appropriate PS/2 support is enabled.
+ PV_OPS A paravirtualized kernel is enabled.
+ RAM RAM disk support is enabled.
+ RDT Intel Resource Director Technology.
+ RISCV RISCV architecture is enabled.
+ S390 S390 architecture is enabled.
+ SCSI Appropriate SCSI support is enabled.
+ A lot of drivers have their options described inside
+ the Documentation/scsi/ sub-directory.
+ SDW SoundWire support is enabled.
+ SECURITY Different security models are enabled.
+ SELINUX SELinux support is enabled.
+ SERIAL Serial support is enabled.
+ SH SuperH architecture is enabled.
+ SMP The kernel is an SMP kernel.
+ SPARC Sparc architecture is enabled.
+ SUSPEND System suspend states are enabled.
+ SWSUSP Software suspend (hibernation) is enabled.
+ TPM TPM drivers are enabled.
+ UMS USB Mass Storage support is enabled.
+ USB USB support is enabled.
+ USBHID USB Human Interface Device support is enabled.
+ V4L Video For Linux support is enabled.
+ VGA The VGA console has been enabled.
+ VMMIO Driver for memory mapped virtio devices is enabled.
+ VT Virtual terminal support is enabled.
+ WDT Watchdog support is enabled.
+ X86-32 X86-32, aka i386 architecture is enabled.
+ X86-64 X86-64 architecture is enabled.
+ X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
+ X86_UV SGI UV support is enabled.
+ XEN Xen support is enabled
+ XTENSA xtensa architecture is enabled.
+
+In addition, the following text indicates that the option
+
+ BOOT Is a boot loader parameter.
+ BUGS= Relates to possible processor bugs on the said processor.
+ KNL Is a kernel start-up parameter.
+
+
+Kernel parameters
+
accept_memory= [MM]
Format: { eager | lazy }
default: lazy
@@ -9,10 +107,10 @@
accept_memory=eager can be used to accept all memory
at once during boot.
- acpi= [HW,ACPI,X86,ARM64,RISCV64]
+ acpi= [HW,ACPI,X86,ARM64,RISCV64,EARLY]
Advanced Configuration and Power Interface
Format: { force | on | off | strict | noirq | rsdt |
- copy_dsdt }
+ copy_dsdt | nospcr }
force -- enable ACPI if default was off
on -- enable ACPI but allow fallback to DT [arm64,riscv64]
off -- disable ACPI if default was on
@@ -21,12 +119,20 @@
strictly ACPI specification compliant.
rsdt -- prefer RSDT over (default) XSDT
copy_dsdt -- copy DSDT to memory
- For ARM64 and RISCV64, ONLY "acpi=off", "acpi=on" or
- "acpi=force" are available
+ nocmcff -- Disable firmware first mode for corrected
+ errors. This disables parsing the HEST CMC error
+ source to check if firmware has set the FF flag. This
+ may result in duplicate corrected error reports.
+ nospcr -- disable console in ACPI SPCR table as
+ default _serial_ console on ARM64
+ For ARM64, ONLY "acpi=off", "acpi=on", "acpi=force" or
+ "acpi=nospcr" are available
+ For RISCV64, ONLY "acpi=off", "acpi=on" or "acpi=force"
+ are available
See also Documentation/power/runtime_pm.rst, pci=noacpi
- acpi_apic_instance= [ACPI, IOAPIC]
+ acpi_apic_instance= [ACPI,IOAPIC,EARLY]
Format: <int>
2: use 2nd APIC table, if available
1,0: use 1st APIC table
@@ -41,7 +147,7 @@
If set to native, use the device's native backlight mode.
If set to none, disable the ACPI backlight interface.
- acpi_force_32bit_fadt_addr
+ acpi_force_32bit_fadt_addr [ACPI,EARLY]
force FADT to use 32 bit addresses rather than the
64 bit X_* addresses. Some firmware have broken 64
bit addresses for force ACPI ignore these and use
@@ -97,7 +203,7 @@
no: ACPI OperationRegions are not marked as reserved,
no further checks are performed.
- acpi_force_table_verification [HW,ACPI]
+ acpi_force_table_verification [HW,ACPI,EARLY]
Enable table checksum verification during early stage.
By default, this is disabled due to x86 early mapping
size limitation.
@@ -137,7 +243,7 @@
acpi_no_memhotplug [ACPI] Disable memory hotplug. Useful for kdump
kernels.
- acpi_no_static_ssdt [HW,ACPI]
+ acpi_no_static_ssdt [HW,ACPI,EARLY]
Disable installation of static SSDTs at early boot time
By default, SSDTs contained in the RSDT/XSDT will be
installed automatically and they will appear under
@@ -151,7 +257,7 @@
Ignore the ACPI-based watchdog interface (WDAT) and let
a native driver control the watchdog device instead.
- acpi_rsdp= [ACPI,EFI,KEXEC]
+ acpi_rsdp= [ACPI,EFI,KEXEC,EARLY]
Pass the RSDP address to the kernel, mostly used
on machines running EFI runtime service to boot the
second kernel for kdump.
@@ -228,10 +334,10 @@
to assume that this machine's pmtimer latches its value
and always returns good values.
- acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
+ acpi_sci= [HW,ACPI,EARLY] ACPI System Control Interrupt trigger mode
Format: { level | edge | high | low }
- acpi_skip_timer_override [HW,ACPI]
+ acpi_skip_timer_override [HW,ACPI,EARLY]
Recognize and ignore IRQ0/pin2 Interrupt Override.
For broken nForce2 BIOS resulting in XT-PIC timer.
@@ -266,11 +372,11 @@
behave incorrectly in some ways with respect to system
suspend and resume to be ignored (use wisely).
- acpi_use_timer_override [HW,ACPI]
+ acpi_use_timer_override [HW,ACPI,EARLY]
Use timer override. For some broken Nvidia NF5 boards
that require a timer override, but don't have HPET
- add_efi_memmap [EFI; X86] Include EFI memory map in
+ add_efi_memmap [EFI,X86,EARLY] Include EFI memory map in
kernel's map of available physical RAM.
agp= [AGP]
@@ -307,7 +413,7 @@
do not want to use tracing_snapshot_alloc() as it needs
to be done where GFP_KERNEL allocations are allowed.
- allow_mismatched_32bit_el0 [ARM64]
+ allow_mismatched_32bit_el0 [ARM64,EARLY]
Allow execve() of 32-bit applications and setting of the
PER_LINUX32 personality on systems where only a strict
subset of the CPUs support 32-bit EL0. When this
@@ -329,12 +435,17 @@
allowed anymore to lift isolation
requirements as needed. This option
does not override iommu=pt
- force_enable - Force enable the IOMMU on platforms known
- to be buggy with IOMMU enabled. Use this
- option with care.
- pgtbl_v1 - Use v1 page table for DMA-API (Default).
- pgtbl_v2 - Use v2 page table for DMA-API.
- irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
+ force_enable - Force enable the IOMMU on platforms known
+ to be buggy with IOMMU enabled. Use this
+ option with care.
+ pgtbl_v1 - Use v1 page table for DMA-API (Default).
+ pgtbl_v2 - Use v2 page table for DMA-API.
+ irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
+ nohugepages - Limit page-sizes used for v1 page-tables
+ to 4 KiB.
+ v2_pgsizes_only - Limit page-sizes used for v1 page-tables
+ to 4KiB/2Mib/1GiB.
+
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
@@ -351,7 +462,7 @@
This mode requires kvm-amd.avic=1.
(Default when IOMMU HW support is present.)
- amd_pstate= [X86]
+ amd_pstate= [X86,EARLY]
disable
Do not enable amd_pstate as the default
scaling driver for the supported processors
@@ -374,6 +485,11 @@
selects a performance level in this range and appropriate
to the current workload.
+ amd_prefcore=
+ [X86]
+ disable
+ Disable amd-pstate preferred core.
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
@@ -391,17 +507,15 @@
not play well with APC CPU idle - disable it if you have
APC and your system crashes randomly.
- apic= [APIC,X86] Advanced Programmable Interrupt Controller
+ apic [APIC,X86-64] Use IO-APIC. Default.
+
+ apic= [APIC,X86,EARLY] Advanced Programmable Interrupt Controller
Change the output verbosity while booting
Format: { quiet (default) | verbose | debug }
Change the amount of debugging information output
when initialising the APIC and IO-APIC components.
- For X86-32, this can also be used to specify an APIC
- driver name.
- Format: apic=driver_name
- Examples: apic=bigsmp
- apic_extnmi= [APIC,X86] External NMI delivery setting
+ apic_extnmi= [APIC,X86,EARLY] External NMI delivery setting
Format: { bsp (default) | all | none }
bsp: External NMI is delivered only to CPU 0
all: External NMIs are broadcast to all CPUs as a
@@ -410,6 +524,10 @@
useful so that a dump capture kernel won't be
shot down by NMI
+ apicpmtimer Do APIC timer calibration using the pmtimer. Implies
+ apicmaintimer. Useful when your PIT timer is totally
+ broken.
+
autoconf= [IPV6]
See Documentation/networking/ipv6.rst.
@@ -426,12 +544,21 @@
arcrimi= [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards
Format: <io>,<irq>,<nodeID>
+ arm64.no32bit_el0 [ARM64] Unconditionally disable the execution of
+ 32 bit applications.
+
arm64.nobti [ARM64] Unconditionally disable Branch Target
Identification support
+ arm64.nogcs [ARM64] Unconditionally disable Guarded Control Stack
+ support
+
arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
Set instructions support
+ arm64.nompam [ARM64] Unconditionally disable Memory Partitioning And
+ Monitoring support
+
arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
support
@@ -505,24 +632,37 @@
Format: <io>,<irq>,<mode>
See header of drivers/net/hamradio/baycom_ser_hdx.c.
+ bdev_allow_write_mounted=
+ Format: <bool>
+ Control the ability to open a mounted block device
+ for writing, i.e., allow / disallow writes that bypass
+ the FS. This was implemented as a means to prevent
+ fuzzers from crashing the kernel by overwriting the
+ metadata underneath a mounted FS without its awareness.
+ This also prevents destructive formatting of mounted
+ filesystems by naive storage tooling that don't use
+ O_EXCL. Default is Y and can be changed through the
+ Kconfig option CONFIG_BLK_DEV_WRITE_MOUNTED.
+
bert_disable [ACPI]
Disable BERT OS support on buggy BIOSes.
- bgrt_disable [ACPI][X86]
+ bgrt_disable [ACPI,X86,EARLY]
Disable BGRT to avoid flickering OEM logo.
blkdevparts= Manual partition parsing of block device(s) for
embedded devices based on command line input.
See Documentation/block/cmdline-partition.rst
- boot_delay= Milliseconds to delay each printk during boot.
+ boot_delay= [KNL,EARLY]
+ Milliseconds to delay each printk during boot.
Only works if CONFIG_BOOT_PRINTK_DELAY is enabled,
and you may also have to specify "lpj=". Boot_delay
values larger than 10 seconds (10000) are assumed
erroneous and ignored.
Format: integer
- bootconfig [KNL]
+ bootconfig [KNL,EARLY]
Extended command line options can be added to an initrd
and this will cause the kernel to look for it.
@@ -557,7 +697,7 @@
trust validation.
format: { id:<keyid> | builtin }
- cca= [MIPS] Override the kernel pages' cache coherency
+ cca= [MIPS,EARLY] Override the kernel pages' cache coherency
algorithm. Accepted values range from 0 to 7
inclusive. See arch/mips/include/asm/pgtable-bits.h
for platform specific values (SB1, Loongson3 and
@@ -566,6 +706,24 @@
ccw_timeout_log [S390]
See Documentation/arch/s390/common_io.rst for details.
+ cfi= [X86-64] Set Control Flow Integrity checking features
+ when CONFIG_FINEIBT is enabled.
+ Format: feature[,feature...]
+ Default: auto
+
+ auto: Use FineIBT if IBT available, otherwise kCFI.
+ Under FineIBT, enable "paranoid" mode when
+ FRED is not available.
+ off: Turn off CFI checking.
+ kcfi: Use kCFI (disable FineIBT).
+ fineibt: Use FineIBT (even if IBT not available).
+ norand: Do not re-randomize CFI hashes.
+ paranoid: Add caller hash checking under FineIBT.
+ bhi: Enable register poisoning to stop speculation
+ across FineIBT. (Disabled by default.)
+ warn: Do not enforce CFI checking: warn only.
+ debug: Report CFI initialization details.
+
cgroup_disable= [KNL] Disable a particular controller or optional feature
Format: {name of the controller(s) or feature(s) to disable}
The effects of cgroup_disable=foo are:
@@ -591,6 +749,14 @@
named mounts. Specifying both "all" and "named" disables
all v1 hierarchies.
+ cgroup_v1_proc= [KNL] Show also missing controllers in /proc/cgroups
+ Format: { "true" | "false" }
+ /proc/cgroups lists only v1 controllers by default.
+ This compatibility option enables listing also v2
+ controllers (whose v1 code is not compiled!), so that
+ semi-legacy software can check this file to decide
+ about usage of v2 (sic) controllers.
+
cgroup_favordynmods= [KNL] Enable or Disable favordynmods.
Format: { "true" | "false" }
Defaults to the value of CONFIG_CGROUP_FAVOR_DYNMODS.
@@ -601,6 +767,14 @@
nokmem -- Disable kernel memory accounting.
nobpf -- Disable BPF memory accounting.
+ check_pages= [MM,EARLY] Enable sanity checking of pages after
+ allocations / before freeing. This adds checks to catch
+ double-frees, use-after-frees, and other sources of
+ page corruption by inspecting page internals (flags,
+ mapcount/refcount, memcg_data, etc.).
+ Format: { "0" | "1" }
+ Default: 0 (1 if CONFIG_DEBUG_VM is set)
+
checkreqprot= [SELINUX] Set initial checkreqprot flag value.
Format: { "0" | "1" }
See security/selinux/Kconfig help text.
@@ -672,19 +846,13 @@
[X86-64] hpet,tsc
clocksource.arm_arch_timer.evtstrm=
- [ARM,ARM64]
+ [ARM,ARM64,EARLY]
Format: <bool>
Enable/disable the eventstream feature of the ARM
architected timer so that code using WFE-based polling
loops can be debugged more effectively on production
systems.
- clocksource.max_cswd_read_retries= [KNL]
- Number of clocksource_watchdog() retries due to
- external delays before the clock will be marked
- unstable. Defaults to two retries, that is,
- three attempts to read the clock under test.
-
clocksource.verify_n_cpus= [KNL]
Limit the number of CPUs checked for clocksources
marked with CLOCK_SOURCE_VERIFY_PERCPU that
@@ -702,7 +870,7 @@
10 seconds when built into the kernel.
cma=nn[MG]@[start[MG][-end[MG]]]
- [KNL,CMA]
+ [KNL,CMA,EARLY]
Sets the size of kernel global memory area for
contiguous memory allocations and optionally the
placement constraint by the physical address range of
@@ -711,7 +879,7 @@
kernel/dma/contiguous.c
cma_pernuma=nn[MG]
- [KNL,CMA]
+ [KNL,CMA,EARLY]
Sets the size of kernel per-numa memory area for
contiguous memory allocations. A value of 0 disables
per-numa CMA altogether. And If this option is not
@@ -722,7 +890,7 @@
they will fallback to the global default memory area.
numa_cma=<node>:nn[MG][,<node>:nn[MG]]
- [KNL,CMA]
+ [KNL,CMA,EARLY]
Sets the size of kernel numa memory area for
contiguous memory allocations. It will reserve CMA
area for the specified node.
@@ -739,7 +907,7 @@
a hypervisor.
Default: yes
- coherent_pool=nn[KMG] [ARM,KNL]
+ coherent_pool=nn[KMG] [ARM,KNL,EARLY]
Sets the size of memory pool for coherent, atomic dma
allocations, by default set to 256K.
@@ -757,7 +925,7 @@
condev= [HW,S390] console device
conmode=
- con3215_drop= [S390] 3215 console drop mode.
+ con3215_drop= [S390,EARLY] 3215 console drop mode.
Format: y|n|Y|N|1|0
When set to true, drop data on the 3215 console when
the console buffer is full. In this case the
@@ -785,6 +953,25 @@
Documentation/networking/netconsole.rst for an
alternative.
+ <DEVNAME>:<n>.<n>[,options]
+ Use the specified serial port on the serial core bus.
+ The addressing uses DEVNAME of the physical serial port
+ device, followed by the serial core controller instance,
+ and the serial port instance. The options are the same
+ as documented for the ttyS addressing above.
+
+ The mapping of the serial ports to the tty instances
+ can be viewed with:
+
+ $ ls -d /sys/bus/serial-base/devices/*:*.*/tty/*
+ /sys/bus/serial-base/devices/00:04:0.0/tty/ttyS0
+
+ In the above example, the console can be addressed with
+ console=00:04:0.0. Note that a console addressed this
+ way will only get added when the related device driver
+ is ready. The use of an earlycon parameter in addition to
+ the console may be desired for console output early on.
+
uart[8250],io,<addr>[,options]
uart[8250],mmio,<addr>[,options]
uart[8250],mmio16,<addr>[,options]
@@ -863,7 +1050,7 @@
kernel before the cpufreq driver probes.
cpu_init_udelay=N
- [X86] Delay for N microsec between assert and de-assert
+ [X86,EARLY] Delay for N microsec between assert and de-assert
of APIC INIT to start processors. This delay occurs
on every CPU online, such as boot, and resume from suspend.
Default: 10000
@@ -875,15 +1062,19 @@
the parameter has no effect.
crash_kexec_post_notifiers
- Run kdump after running panic-notifiers and dumping
- kmsg. This only for the users who doubt kdump always
- succeeds in any situation.
- Note that this also increases risks of kdump failure,
- because some panic notifiers can make the crashed
- kernel more unstable.
+ Only jump to kdump kernel after running the panic
+ notifiers and dumping kmsg. This option increases
+ the risks of a kdump failure, since some panic
+ notifiers can make the crashed kernel more unstable.
+ In configurations where kdump may not be reliable,
+ running the panic notifiers could allow collecting
+ more data on dmesg, like stack traces from other CPUS
+ or extra data dumped by panic_print. Note that some
+ configurations enable this option unconditionally,
+ like Hyper-V, PowerPC (fadump) and AMD SEV-SNP.
crashkernel=size[KMG][@offset[KMG]]
- [KNL] Using kexec, Linux can switch to a 'crash kernel'
+ [KNL,EARLY] Using kexec, Linux can switch to a 'crash kernel'
upon panic. This parameter reserves the physical
memory region [offset, offset + size] for that kernel
image. If '@offset' is omitted, then a suitable offset
@@ -927,6 +1118,28 @@
0: to disable low allocation.
It will be ignored when crashkernel=X,high is not used
or memory reserved is below 4G.
+ crashkernel=size[KMG],cma
+ [KNL, X86, ppc] Reserve additional crash kernel memory from
+ CMA. This reservation is usable by the first system's
+ userspace memory and kernel movable allocations (memory
+ balloon, zswap). Pages allocated from this memory range
+ will not be included in the vmcore so this should not
+ be used if dumping of userspace memory is intended and
+ it has to be expected that some movable kernel pages
+ may be missing from the dump.
+
+ A standard crashkernel reservation, as described above,
+ is still needed to hold the crash kernel and initrd.
+
+ This option increases the risk of a kdump failure: DMA
+ transfers configured by the first kernel may end up
+ corrupting the second kernel's memory.
+
+ This reservation method is intended for systems that
+ can't afford to sacrifice enough memory for standard
+ crashkernel reservation and where less reliable and
+ possibly incomplete kdump is preferable to no kdump at
+ all.
cryptomgr.notests
[KNL] Disable crypto self-tests
@@ -954,10 +1167,10 @@
Format: <port#>,<type>
See also Documentation/input/devices/joystick-parport.rst
- debug [KNL] Enable kernel debugging (events log level).
+ debug [KNL,EARLY] Enable kernel debugging (events log level).
debug_boot_weak_hash
- [KNL] Enable printing [hashed] pointers early in the
+ [KNL,EARLY] Enable printing [hashed] pointers early in the
boot sequence. If enabled, we use a weak hash instead
of siphash to hash pointers. Use this option if you are
seeing instances of '(___ptrval___)') and need to see a
@@ -974,10 +1187,10 @@
will print _a_lot_ more information - normally only
useful to lockdep developers.
- debug_objects [KNL] Enable object debugging
+ debug_objects [KNL,EARLY] Enable object debugging
debug_guardpage_minorder=
- [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
+ [KNL,EARLY] When CONFIG_DEBUG_PAGEALLOC is set, this
parameter allows control of the order of pages that will
be intentionally kept free (and hence protected) by the
buddy allocator. Bigger value increase the probability
@@ -996,7 +1209,7 @@
help tracking down these problems.
debug_pagealloc=
- [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
+ [KNL,EARLY] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
enables the feature at boot time. By default, it is
disabled and the system will work mostly the same as a
kernel built without CONFIG_DEBUG_PAGEALLOC.
@@ -1004,14 +1217,10 @@
useful to also enable the page_owner functionality.
on: enable the feature
- debugfs= [KNL] This parameter enables what is exposed to userspace
- and debugfs internal clients.
- Format: { on, no-mount, off }
+ debugfs= [KNL,EARLY] This parameter enables what is exposed to
+ userspace and debugfs internal clients.
+ Format: { on, off }
on: All functions are enabled.
- no-mount:
- Filesystem is not registered but kernel clients can
- access APIs and a crashkernel can be used to read
- its content. There is nothing to mount.
off: Filesystem is not registered and clients
get a -EPERM as result when trying to register files
or directories within debugfs.
@@ -1084,7 +1293,7 @@
dhash_entries= [KNL]
Set number of hash buckets for dentry cache.
- disable_1tb_segments [PPC]
+ disable_1tb_segments [PPC,EARLY]
Disables the use of 1TB hash page table segments. This
causes the kernel to fall back to 256MB segments which
can be useful when debugging issues that require an SLB
@@ -1093,41 +1302,32 @@
disable= [IPV6]
See Documentation/networking/ipv6.rst.
- disable_radix [PPC]
+ disable_radix [PPC,EARLY]
Disable RADIX MMU mode on POWER9
disable_tlbie [PPC]
Disable TLBIE instruction. Currently does not work
with KVM, with HASH MMU, or with coherent accelerators.
- disable_cpu_apicid= [X86,APIC,SMP]
- Format: <int>
- The number of initial APIC ID for the
- corresponding CPU to be disabled at boot,
- mostly used for the kdump 2nd kernel to
- disable BSP to wake up multiple CPUs without
- causing system reset or hang due to sending
- INIT from AP to BSP.
-
- disable_ddw [PPC/PSERIES]
+ disable_ddw [PPC/PSERIES,EARLY]
Disable Dynamic DMA Window support. Use this
to workaround buggy firmware.
disable_ipv6= [IPV6]
See Documentation/networking/ipv6.rst.
- disable_mtrr_cleanup [X86]
+ disable_mtrr_cleanup [X86,EARLY]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
entry later. This parameter disables that.
- disable_mtrr_trim [X86, Intel and AMD only]
+ disable_mtrr_trim [X86, Intel and AMD only,EARLY]
By default the kernel will trim any uncacheable
memory out of your available memory pool based on
MTRR settings. This parameter disables that behavior,
possibly causing your machine to run very slowly.
- disable_timer_pin_1 [X86]
+ disable_timer_pin_1 [X86,EARLY]
Disable PIN 1 of APIC timer
Can be useful to work around chipset bugs.
@@ -1150,6 +1350,26 @@
The filter can be disabled or changed to another
driver later using sysfs.
+ reg_file_data_sampling=
+ [X86] Controls mitigation for Register File Data
+ Sampling (RFDS) vulnerability. RFDS is a CPU
+ vulnerability which may allow userspace to infer
+ kernel data values previously stored in floating point
+ registers, vector registers, or integer registers.
+ RFDS only affects Intel Atom processors.
+
+ on: Turns ON the mitigation.
+ off: Turns OFF the mitigation.
+
+ This parameter overrides the compile time default set
+ by CONFIG_MITIGATION_RFDS. Mitigation cannot be
+ disabled when other VERW based mitigations (like MDS)
+ are enabled. In order to disable RFDS mitigation all
+ VERW based mitigations need to be disabled.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst
+
driver_async_probe= [KNL]
List of driver names to be probed asynchronously. *
matches with all driver names. If * is specified, the
@@ -1162,22 +1382,16 @@
panels may send no or incorrect EDID data sets.
This parameter allows to specify an EDID data sets
in the /lib/firmware directory that are used instead.
- Generic built-in EDID data sets are used, if one of
- edid/1024x768.bin, edid/1280x1024.bin,
- edid/1680x1050.bin, or edid/1920x1080.bin is given
- and no file with the same name exists. Details and
- instructions how to build your own EDID data are
- available in Documentation/admin-guide/edid.rst. An EDID
- data set will only be used for a particular connector,
- if its name and a colon are prepended to the EDID
- name. Each connector may use a unique EDID data
- set by separating the files with a comma. An EDID
+ An EDID data set will only be used for a particular
+ connector, if its name and a colon are prepended to
+ the EDID name. Each connector may use a unique EDID
+ data set by separating the files with a comma. An EDID
data set with no connector name will be used for
any connectors not explicitly specified.
dscc4.setup= [NET]
- dt_cpu_ftrs= [PPC]
+ dt_cpu_ftrs= [PPC,EARLY]
Format: {"off" | "known"}
Control how the dt_cpu_ftrs device-tree binding is
used for CPU feature discovery and setup (if it
@@ -1197,12 +1411,12 @@
Documentation/admin-guide/dynamic-debug-howto.rst
for details.
- early_ioremap_debug [KNL]
+ early_ioremap_debug [KNL,EARLY]
Enable debug messages in early_ioremap support. This
is useful for tracking down temporary early mappings
which are not unmapped.
- earlycon= [KNL] Output early console device and options.
+ earlycon= [KNL,EARLY] Output early console device and options.
When used with no options, the early console is
determined by stdout-path property in device tree's
@@ -1338,7 +1552,7 @@
address must be provided, and the serial port must
already be setup and configured.
- earlyprintk= [X86,SH,ARM,M68k,S390]
+ earlyprintk= [X86,SH,ARM,M68k,S390,UM,EARLY]
earlyprintk=vga
earlyprintk=sclp
earlyprintk=xen
@@ -1346,7 +1560,8 @@
earlyprintk=serial[,0x...[,baudrate]]
earlyprintk=ttySn[,baudrate]
earlyprintk=dbgp[debugController#]
- earlyprintk=pciserial[,force],bus:device.function[,baudrate]
+ earlyprintk=mmio32,membase[,{nocfg|baudrate}]
+ earlyprintk=pciserial[,force],bus:device.function[,{nocfg|baudrate}]
earlyprintk=xdbc[xhciController#]
earlyprintk=bios
@@ -1354,6 +1569,9 @@
the normal console is initialized. It is not enabled by
default because it has some cosmetic problems.
+ Use "nocfg" to skip UART configuration, assume
+ BIOS/firmware has configured UART correctly.
+
Append ",keep" to not disable it when the real console
takes over.
@@ -1396,7 +1614,7 @@
edd= [EDD]
Format: {"off" | "on" | "skip[mbr]"}
- efi= [EFI]
+ efi= [EFI,EARLY]
Format: { "debug", "disable_early_pci_dma",
"nochunk", "noruntime", "nosoftreserve",
"novamap", "no_disable_early_pci_dma" }
@@ -1417,33 +1635,12 @@
no_disable_early_pci_dma: Leave the busmaster bit set
on all PCI bridges while in the EFI boot stub
- efi_no_storage_paranoia [EFI; X86]
+ efi_no_storage_paranoia [EFI,X86,EARLY]
Using this parameter you can use more than 50% of
your efi variable storage. Use this parameter only if
you are really sure that your UEFI does sane gc and
fulfills the spec otherwise your board may brick.
- efi_fake_mem= nn[KMG]@ss[KMG]:aa[,nn[KMG]@ss[KMG]:aa,..] [EFI; X86]
- Add arbitrary attribute to specific memory range by
- updating original EFI memory map.
- Region of memory which aa attribute is added to is
- from ss to ss+nn.
-
- If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000
- is specified, EFI_MEMORY_MORE_RELIABLE(0x10000)
- attribute is added to range 0x100000000-0x180000000 and
- 0x10a0000000-0x1120000000.
-
- If efi_fake_mem=8G@9G:0x40000 is specified, the
- EFI_MEMORY_SP(0x40000) attribute is added to
- range 0x240000000-0x43fffffff.
-
- Using this parameter you can do debugging of EFI memmap
- related features. For example, you can do debugging of
- Address Range Mirroring feature even if your box
- doesn't support it, or mark specific memory as
- "soft reserved".
-
efivar_ssdt= [EFI; X86] Name of an EFI variable that contains an SSDT
that is to be dynamically loaded by Linux. If there are
multiple variables with the same name but with different
@@ -1454,7 +1651,7 @@
eisa_irq_edge= [PARISC,HW]
See header of drivers/parisc/eisa.c.
- ekgdboc= [X86,KGDB] Allow early kernel console debugging
+ ekgdboc= [X86,KGDB,EARLY] Allow early kernel console debugging
Format: ekgdboc=kbd
This is designed to be used in conjunction with
@@ -1469,13 +1666,13 @@
See comment before function elanfreq_setup() in
arch/x86/kernel/cpu/cpufreq/elanfreq.c.
- elfcorehdr=[size[KMG]@]offset[KMG] [PPC,SH,X86,S390]
+ elfcorehdr=[size[KMG]@]offset[KMG] [PPC,SH,X86,S390,EARLY]
Specifies physical address of start of kernel core
image elf header and optionally the size. Generally
kexec loader will pass this option to capture kernel.
See Documentation/admin-guide/kdump/kdump.rst for details.
- enable_mtrr_cleanup [X86]
+ enable_mtrr_cleanup [X86,EARLY]
The kernel tries to adjust MTRR layout from continuous
to discrete, to make X server driver able to add WB
entry later. This parameter enables that.
@@ -1508,7 +1705,7 @@
Permit 'security.evm' to be updated regardless of
current integrity status.
- early_page_ext [KNL] Enforces page_ext initialization to earlier
+ early_page_ext [KNL,EARLY] Enforces page_ext initialization to earlier
stages so cover more early boot allocations.
Please note that as side effect some optimizations
might be disabled to achieve that (e.g. parallelized
@@ -1519,6 +1716,7 @@
failslab=
fail_usercopy=
fail_page_alloc=
+ fail_skb_realloc=
fail_make_request=[KNL]
General fault injection mechanism.
Format: <interval>,<probability>,<space>,<times>
@@ -1539,6 +1737,12 @@
Warning: use of this parameter will taint the kernel
and may cause unknown problems.
+ fred= [X86-64]
+ Enable/disable Flexible Return and Event Delivery.
+ Format: { on | off }
+ on: enable FRED when it's present.
+ off: disable FRED, the default setting.
+
ftrace=[tracer]
[FTRACE] will set and start the specified tracer
as early as possible in order to facilitate early
@@ -1561,12 +1765,28 @@
The above will cause the "foo" tracing instance to trigger
a snapshot at the end of boot up.
- ftrace_dump_on_oops[=orig_cpu]
+ ftrace_dump_on_oops[=2(orig_cpu) | =<instance>][,<instance> |
+ ,<instance>=2(orig_cpu)]
[FTRACE] will dump the trace buffers on oops.
- If no parameter is passed, ftrace will dump
- buffers of all CPUs, but if you pass orig_cpu, it will
- dump only the buffer of the CPU that triggered the
- oops.
+ If no parameter is passed, ftrace will dump global
+ buffers of all CPUs, if you pass 2 or orig_cpu, it
+ will dump only the buffer of the CPU that triggered
+ the oops, or the specific instance will be dumped if
+ its name is passed. Multiple instance dump is also
+ supported, and instances are separated by commas. Each
+ instance supports only dump on CPU that triggered the
+ oops by passing 2 or orig_cpu to it.
+
+ ftrace_dump_on_oops=foo=orig_cpu
+
+ The above will dump only the buffer of "foo" instance
+ on CPU that triggered the oops.
+
+ ftrace_dump_on_oops,foo,bar=orig_cpu
+
+ The above will dump global buffer on all CPUs, the
+ buffer of "foo" instance on all CPUs and the buffer
+ of "bar" instance on CPU that triggered the oops.
ftrace_filter=[function-list]
[FTRACE] Limit the functions traced by the function
@@ -1600,7 +1820,7 @@
can be changed at run time by the max_graph_depth file
in the tracefs tracing directory. default: 0 (no limit)
- fw_devlink= [KNL] Create device links between consumer and supplier
+ fw_devlink= [KNL,EARLY] Create device links between consumer and supplier
devices by scanning the firmware to infer the
consumer/supplier relationships. This feature is
especially useful when drivers are loaded as modules as
@@ -1619,12 +1839,12 @@
rpm -- Like "on", but also use to order runtime PM.
fw_devlink.strict=<bool>
- [KNL] Treat all inferred dependencies as mandatory
+ [KNL,EARLY] Treat all inferred dependencies as mandatory
dependencies. This only applies for fw_devlink=on|rpm.
Format: <bool>
fw_devlink.sync_state =
- [KNL] When all devices that could probe have finished
+ [KNL,EARLY] When all devices that could probe have finished
probing, this parameter controls what to do with
devices that haven't yet received their sync_state()
calls.
@@ -1645,12 +1865,12 @@
gamma= [HW,DRM]
- gart_fix_e820= [X86-64] disable the fix e820 for K8 GART
+ gart_fix_e820= [X86-64,EARLY] disable the fix e820 for K8 GART
Format: off | on
default: on
gather_data_sampling=
- [X86,INTEL] Control the Gather Data Sampling (GDS)
+ [X86,INTEL,EARLY] Control the Gather Data Sampling (GDS)
mitigation.
Gather Data Sampling is a hardware vulnerability which
@@ -1669,6 +1889,8 @@
off: Disable GDS mitigation.
+ gbpages [X86] Use GB pages for kernel direct mappings.
+
gcov_persist= [GCOV] When non-zero (default), profiling data for
kernel modules is saved and remains accessible via
debugfs, even when the module is unloaded/reloaded.
@@ -1716,7 +1938,9 @@
allocation boundaries as a proactive defense
against bounds-checking flaws in the kernel's
copy_to_user()/copy_from_user() interface.
- on Perform hardened usercopy checks (default).
+ The default is determined by
+ CONFIG_HARDENED_USERCOPY_DEFAULT_ON.
+ on Perform hardened usercopy checks.
off Disable hardened usercopy checks.
hardlockup_all_cpu_backtrace=
@@ -1724,13 +1948,32 @@
backtraces on all cpus.
Format: 0 | 1
+ hash_pointers=
+ [KNL,EARLY]
+ By default, when pointers are printed to the console
+ or buffers via the %p format string, that pointer is
+ "hashed", i.e. obscured by hashing the pointer value.
+ This is a security feature that hides actual kernel
+ addresses from unprivileged users, but it also makes
+ debugging the kernel more difficult since unequal
+ pointers can no longer be compared. The choices are:
+ Format: { auto | always | never }
+ Default: auto
+
+ auto - Hash pointers unless slab_debug is enabled.
+ always - Always hash pointers (even if slab_debug is
+ enabled).
+ never - Never hash pointers. This option should only
+ be specified when debugging the kernel. Do
+ not use on production kernels. The boot
+ param "no_hash_pointers" is an alias for
+ this mode.
+
hashdist= [KNL,NUMA] Large hashes allocated during boot
are distributed across NUMA nodes. Defaults on
for 64-bit NUMA, off otherwise.
Format: 0 | 1 (for off | on)
- hcl= [IA-64] SGI's Hardware Graph compatibility layer
-
hd= [EIDE] (E)IDE hard drive subsystem geometry
Format: <cyl>,<head>,<sect>
@@ -1748,7 +1991,35 @@
(that will set all pages holding image data
during restoration read-only).
- highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
+ hibernate.compressor= [HIBERNATION] Compression algorithm to be
+ used with hibernation.
+ Format: { lzo | lz4 }
+ Default: lzo
+
+ lzo: Select LZO compression algorithm to
+ compress/decompress hibernation image.
+
+ lz4: Select LZ4 compression algorithm to
+ compress/decompress hibernation image.
+
+ hibernate.pm_test_delay=
+ [HIBERNATION]
+ Sets the number of seconds to remain in a hibernation test
+ mode before resuming the system (see
+ /sys/power/pm_test). Only available when CONFIG_PM_DEBUG
+ is set. Default value is 5.
+
+ hibernate_compression_threads=
+ [HIBERNATION]
+ Set the number of threads used for compressing or decompressing
+ hibernation images.
+
+ Format: <integer>
+ Default: 3
+ Minimum: 1
+ Example: hibernate_compression_threads=4
+
+ highmem=nn[KMG] [KNL,BOOT,EARLY] forces the highmem zone to have an exact
size of <nn>. This works even on boxes that have no
highmem otherwise. This also works to reduce highmem
size on bigger boxes.
@@ -1759,7 +2030,7 @@
hlt [BUGS=ARM,SH]
- hostname= [KNL] Set the hostname (aka UTS nodename).
+ hostname= [KNL,EARLY] Set the hostname (aka UTS nodename).
Format: <string>
This allows setting the system's hostname during early
startup. This sets the name returned by gethostname.
@@ -1783,7 +2054,7 @@
hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET
registers. Default set by CONFIG_HPET_MMAP_DEFAULT.
- hugepages= [HW] Number of HugeTLB pages to allocate at boot.
+ hugepages= [HW,EARLY] Number of HugeTLB pages to allocate at boot.
If this follows hugepagesz (below), it specifies
the number of pages of hugepagesz to be allocated.
If this is the first HugeTLB parameter on the command
@@ -1795,16 +2066,25 @@
<node>:<integer>[,<node>:<integer>]
hugepagesz=
- [HW] The size of the HugeTLB pages. This is used in
- conjunction with hugepages (above) to allocate huge
- pages of a specific size at boot. The pair
- hugepagesz=X hugepages=Y can be specified once for
- each supported huge page size. Huge page sizes are
- architecture dependent. See also
+ [HW,EARLY] The size of the HugeTLB pages. This is
+ used in conjunction with hugepages (above) to
+ allocate huge pages of a specific size at boot. The
+ pair hugepagesz=X hugepages=Y can be specified once
+ for each supported huge page size. Huge page sizes
+ are architecture dependent. See also
Documentation/admin-guide/mm/hugetlbpage.rst.
Format: size[KMG]
- hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation
+ hugepage_alloc_threads=
+ [HW] The number of threads that should be used to
+ allocate hugepages during boot. This option can be
+ used to improve system bootup time when allocating
+ a large amount of huge pages.
+ The default value is 25% of the available hardware threads.
+
+ Note that this parameter only applies to non-gigantic huge pages.
+
+ hugetlb_cma= [HW,CMA,EARLY] The size of a CMA area used for allocation
of gigantic hugepages. Or using node format, the size
of a CMA area per node can be specified.
Format: nn[KMGTPE] or (node format)
@@ -1814,6 +2094,13 @@
hugepages using the CMA allocator. If enabled, the
boot-time allocation of gigantic hugepages is skipped.
+ hugetlb_cma_only=
+ [HW,CMA,EARLY] When allocating new HugeTLB pages, only
+ try to allocate from the CMA areas.
+
+ This option does nothing if hugetlb_cma= is not also
+ specified.
+
hugetlb_free_vmemmap=
[KNL] Requires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
enabled.
@@ -1835,14 +2122,20 @@
the added memory block itself do not be affected.
hung_task_panic=
- [KNL] Should the hung task detector generate panics.
- Format: 0 | 1
+ [KNL] Number of hung tasks to trigger kernel panic.
+ Format: <int>
+
+ When set to a non-zero value, a kernel panic will be triggered if
+ the number of detected hung tasks reaches this value.
- A value of 1 instructs the kernel to panic when a
- hung task is detected. The default value is controlled
- by the CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time
- option. The value selected by this boot parameter can
- be changed later by the kernel.hung_task_panic sysctl.
+ 0: don't panic
+ 1: panic immediately on first hung task
+ N: panic after N hung tasks are detected in a single scan
+
+ The default value is controlled by the
+ CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time option. The value
+ selected by this boot parameter can be changed later by the
+ kernel.hung_task_panic sysctl.
hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
terminal devices. Valid values: 0..8
@@ -1850,9 +2143,16 @@
If specified, z/VM IUCV HVC accepts connections
from listed z/VM user IDs only.
- hv_nopvspin [X86,HYPER_V] Disables the paravirt spinlock optimizations
- which allow the hypervisor to 'idle' the
- guest on lock contention.
+ hv_nopvspin [X86,HYPER_V,EARLY]
+ Disables the paravirt spinlock optimizations
+ which allow the hypervisor to 'idle' the guest
+ on lock contention.
+
+ hw_protection= [HW]
+ Format: reboot | shutdown
+
+ Hardware protection action taken on critical events like
+ overtemperature or imminent voltage loss.
i2c_bus= [HW] Override the default board specific I2C bus speed
or register an additional I2C bus that is not
@@ -1860,6 +2160,28 @@
Format:
<bus_id>,<clkrate>
+ i2c_touchscreen_props= [HW,ACPI,X86]
+ Set device-properties for ACPI-enumerated I2C-attached
+ touchscreen, to e.g. fix coordinates of upside-down
+ mounted touchscreens. If you need this option please
+ submit a drivers/platform/x86/touchscreen_dmi.c patch
+ adding a DMI quirk for this.
+
+ Format:
+ <ACPI_HW_ID>:<prop_name>=<val>[:prop_name=val][:...]
+ Where <val> is one of:
+ Omit "=<val>" entirely Set a boolean device-property
+ Unsigned number Set a u32 device-property
+ Anything else Set a string device-property
+
+ Examples (split over multiple lines):
+ i2c_touchscreen_props=GDIX1001:touchscreen-inverted-x:
+ touchscreen-inverted-y
+
+ i2c_touchscreen_props=MSSL1680:touchscreen-size-x=1920:
+ touchscreen-size-y=1080:touchscreen-inverted-y:
+ firmware-name=gsl1680-vendor-model.fw:silead,home-button
+
i8042.debug [HW] Toggle i8042 debug mode
i8042.unmask_kbd_data
[HW] Enable printing of interrupt data from the KBD port
@@ -1917,14 +2239,23 @@
Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]]
- idle= [X86]
+ idle= [X86,EARLY]
Format: idle=poll, idle=halt, idle=nomwait
- Poll forces a polling idle loop that can slightly
- improve the performance of waking up a idle CPU, but
- will use a lot of power and make the system run hot.
- Not recommended.
+
+ idle=poll: Don't do power saving in the idle loop
+ using HLT, but poll for rescheduling event. This will
+ make the CPUs eat a lot more power, but may be useful
+ to get slightly better performance in multiprocessor
+ benchmarks. It also makes some profiling using
+ performance counters more accurate. Please note that
+ on systems with MONITOR/MWAIT support (like Intel
+ EM64T CPUs) this option has no performance advantage
+ over the normal idle loop. It may also interact badly
+ with hyperthreading.
+
idle=halt: Halt is forced to be used for CPU idle.
In such case C2/C3 won't be used again.
+
idle=nomwait: Disable mwait for CPU C-states
idxd.sva= [HW]
@@ -1939,7 +2270,7 @@
for the device. By default it is set to false (0).
ieee754= [MIPS] Select IEEE Std 754 conformance mode
- Format: { strict | legacy | 2008 | relaxed }
+ Format: { strict | legacy | 2008 | relaxed | emulated }
Default: strict
Choose which programs will be accepted for execution
@@ -1959,6 +2290,8 @@
by the FPU
relaxed accept any binaries regardless of whether
supported by the FPU
+ emulated accept any binaries but enable FPU emulator
+ if binary mode is unsupported by the FPU.
The FPU emulator is always able to support both NaN
encodings, so if no FPU hardware is present or it has
@@ -1973,7 +2306,7 @@
mode generally follows that for the NaN encoding,
except where unsupported by hardware.
- ignore_loglevel [KNL]
+ ignore_loglevel [KNL,EARLY]
Ignore loglevel setting - this will print /all/
kernel messages to the console. Useful for debugging.
We also add it as printk module parameter, so users
@@ -2066,6 +2399,28 @@
different crypto accelerators. This option can be used
to achieve best performance for particular HW.
+ ima= [IMA] Enable or disable IMA
+ Format: { "off" | "on" }
+ Default: "on"
+ Note that disabling IMA is limited to kdump kernel.
+
+ indirect_target_selection= [X86,Intel] Mitigation control for Indirect
+ Target Selection(ITS) bug in Intel CPUs. Updated
+ microcode is also required for a fix in IBPB.
+
+ on: Enable mitigation (default).
+ off: Disable mitigation.
+ force: Force the ITS bug and deploy default
+ mitigation.
+ vmexit: Only deploy mitigation if CPU is affected by
+ guest/host isolation part of ITS.
+ stuff: Deploy RSB-fill mitigation when retpoline is
+ also deployed. Otherwise, deploy the default
+ mitigation.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/indirect-target-selection.rst
+
init= [KNL]
Format: <full_path>
Run specified binary instead of /sbin/init as init
@@ -2091,21 +2446,21 @@
unpacking being completed before device_ and
late_ initcalls.
- initrd= [BOOT] Specify the location of the initial ramdisk
+ initrd= [BOOT,EARLY] Specify the location of the initial ramdisk
- initrdmem= [KNL] Specify a physical address and size from which to
+ initrdmem= [KNL,EARLY] Specify a physical address and size from which to
load the initrd. If an initrd is compiled in or
specified in the bootparams, it takes priority over this
setting.
Format: ss[KMG],nn[KMG]
Default is 0, 0
- init_on_alloc= [MM] Fill newly allocated pages and heap objects with
+ init_on_alloc= [MM,EARLY] Fill newly allocated pages and heap objects with
zeroes.
Format: 0 | 1
Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
- init_on_free= [MM] Fill freed pages and heap objects with zeroes.
+ init_on_free= [MM,EARLY] Fill freed pages and heap objects with zeroes.
Format: 0 | 1
Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
@@ -2161,7 +2516,7 @@
0 disables intel_idle and fall back on acpi_idle.
1 to 9 specify maximum depth of C-state.
- intel_pstate= [X86]
+ intel_pstate= [X86,EARLY]
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
@@ -2204,35 +2559,93 @@
per_cpu_perf_limits
Allow per-logical-CPU P-State performance control limits using
cpufreq sysfs interface
+ no_cas
+ Do not enable capacity-aware scheduling (CAS) on
+ hybrid systems
- intremap= [X86-64, Intel-IOMMU]
+ intremap= [X86-64,Intel-IOMMU,EARLY]
on enable Interrupt Remapping (default)
off disable Interrupt Remapping
nosid disable Source ID checking
no_x2apic_optout
BIOS x2APIC opt-out request will be ignored
nopost disable Interrupt Posting
+ posted_msi
+ enable MSIs delivered as posted interrupts
iomem= Disable strict checking of access to MMIO memory
strict regions from userspace.
relaxed
- iommu= [X86]
+ iommu= [X86,EARLY]
+
off
+ Don't initialize and use any kind of IOMMU.
+
force
+ Force the use of the hardware IOMMU even when
+ it is not actually needed (e.g. because < 3 GB
+ memory).
+
noforce
+ Don't force hardware IOMMU usage when it is not
+ needed. (default).
+
biomerge
panic
nopanic
merge
nomerge
+
soft
- pt [X86]
- nopt [X86]
- nobypass [PPC/POWERNV]
+ Use software bounce buffering (SWIOTLB) (default for
+ Intel machines). This can be used to prevent the usage
+ of an available hardware IOMMU.
+
+ [X86]
+ pt
+ [X86]
+ nopt
+ [PPC/POWERNV]
+ nobypass
Disable IOMMU bypass, using IOMMU for PCI devices.
- iommu.forcedac= [ARM64, X86] Control IOVA allocation for PCI devices.
+ [X86]
+ AMD Gart HW IOMMU-specific options:
+
+ <size>
+ Set the size of the remapping area in bytes.
+
+ allowed
+ Overwrite iommu off workarounds for specific chipsets
+
+ fullflush
+ Flush IOMMU on each allocation (default).
+
+ nofullflush
+ Don't use IOMMU fullflush.
+
+ memaper[=<order>]
+ Allocate an own aperture over RAM with size
+ 32MB<<order. (default: order=1, i.e. 64MB)
+
+ merge
+ Do scatter-gather (SG) merging. Implies "force"
+ (experimental).
+
+ nomerge
+ Don't do scatter-gather (SG) merging.
+
+ noaperture
+ Ask the IOMMU not to touch the aperture for AGP.
+
+ noagp
+ Don't initialize the AGP driver and use full aperture.
+
+ panic
+ Always panic when IOMMU overflows.
+
+ iommu.forcedac= [ARM64,X86,EARLY] Control IOVA allocation for PCI devices.
Format: { "0" | "1" }
0 - Try to allocate a 32-bit DMA address first, before
falling back to the full range if needed.
@@ -2240,7 +2653,7 @@
forcing Dual Address Cycle for PCI cards supporting
greater than 32-bit addressing.
- iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour
+ iommu.strict= [ARM64,X86,S390,EARLY] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
Request that DMA unmap operations use deferred
@@ -2256,7 +2669,7 @@
legacy driver-specific options takes precedence.
iommu.passthrough=
- [ARM64, X86] Configure DMA to bypass the IOMMU by default.
+ [ARM64,X86,EARLY] Configure DMA to bypass the IOMMU by default.
Format: { "0" | "1" }
0 - Use IOMMU translation for DMA.
1 - Bypass the IOMMU for DMA.
@@ -2266,7 +2679,7 @@
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
- io_delay= [X86] I/O delay method
+ io_delay= [X86,EARLY] I/O delay method
0x80
Standard port 0x80 based delay
0xed
@@ -2279,37 +2692,61 @@
ip= [IP_PNP]
See Documentation/admin-guide/nfs/nfsroot.rst.
- ipcmni_extend [KNL] Extend the maximum number of unique System V
+ ipcmni_extend [KNL,EARLY] Extend the maximum number of unique System V
IPC identifiers from 32,768 to 16,777,216.
+ ipe.enforce= [IPE]
+ Format: <bool>
+ Determine whether IPE starts in permissive (0) or
+ enforce (1) mode. The default is enforce.
+
+ ipe.success_audit=
+ [IPE]
+ Format: <bool>
+ Start IPE with success auditing enabled, emitting
+ an audit event when a binary is allowed. The default
+ is 0.
+
irqaffinity= [SMP] Set the default irq affinity mask
The argument is a cpu list, as described above.
irqchip.gicv2_force_probe=
- [ARM, ARM64]
+ [ARM,ARM64,EARLY]
Format: <bool>
Force the kernel to look for the second 4kB page
of a GICv2 controller even if the memory range
exposed by the device tree is too small.
irqchip.gicv3_nolpi=
- [ARM, ARM64]
+ [ARM,ARM64,EARLY]
Force the kernel to ignore the availability of
LPIs (and by consequence ITSs). Intended for system
that use the kernel as a bootloader, and thus want
to let secondary kernels in charge of setting up
LPIs.
- irqchip.gicv3_pseudo_nmi= [ARM64]
+ irqchip.gicv3_pseudo_nmi= [ARM64,EARLY]
Enables support for pseudo-NMIs in the kernel. This
requires the kernel to be built with
CONFIG_ARM64_PSEUDO_NMI.
+ irqchip.riscv_imsic_noipi
+ [RISC-V,EARLY]
+ Force the kernel to not use IMSIC software injected MSIs
+ as IPIs. Intended for system where IMSIC is trap-n-emulated,
+ and thus want to reduce MMIO traps when triggering IPIs
+ to multiple harts.
+
irqfixup [HW]
When an interrupt is not handled search all handlers
for it. Intended to get systems with badly broken
firmware running.
+ irqhandler.duration_warn_us= [KNL]
+ Warn if an IRQ handler exceeds the specified duration
+ threshold in microseconds. Useful for identifying
+ long-running IRQs in the system.
+
irqpoll [HW]
When an interrupt is not handled search all handlers
for it. Also check all handlers each timer
@@ -2327,7 +2764,9 @@
specified in the flag list (default: domain):
nohz
- Disable the tick when a single task runs.
+ Disable the tick when a single task runs as well as
+ disabling other kernel noises like having RCU callbacks
+ offloaded. This is equivalent to the nohz_full parameter.
A residual 1Hz tick is offloaded to workqueues, which you
need to affine to housekeeping through the global
@@ -2445,7 +2884,7 @@
parameter KASAN will print report only for the first
invalid access.
- keep_bootcon [KNL]
+ keep_bootcon [KNL,EARLY]
Do not unregister boot console at start. This is only
useful for debugging when something happens in the window
between unregistering the boot console and initializing
@@ -2453,7 +2892,7 @@
keepinitrd [HW,ARM] See retain_initrd.
- kernelcore= [KNL,X86,IA-64,PPC]
+ kernelcore= [KNL,X86,PPC,EARLY]
Format: nn[KMGTPE] | nn% | "mirror"
This parameter specifies the amount of memory usable by
the kernel for non-movable allocations. The requested
@@ -2478,7 +2917,7 @@
for Movable pages. "nn[KMGTPE]", "nn%", and "mirror"
are exclusive, so you cannot specify multiple forms.
- kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
+ kgdbdbgp= [KGDB,HW,EARLY] kgdb over EHCI usb debug port.
Format: <Controller#>[,poll interval]
The controller # is the number of the ehci usb debug
port as it is probed via PCI. The poll interval is
@@ -2499,7 +2938,7 @@
kms, kbd format: kms,kbd
kms, kbd and serial format: kms,kbd,<ser_dev>[,baud]
- kgdboc_earlycon= [KGDB,HW]
+ kgdboc_earlycon= [KGDB,HW,EARLY]
If the boot console provides the ability to read
characters and can work in polling mode, you can use
this parameter to tell kgdb to use it as a backend
@@ -2514,14 +2953,39 @@
blank and the first boot console that implements
read() will be picked.
- kgdbwait [KGDB] Stop kernel execution and enter the
+ kgdbwait [KGDB,EARLY] Stop kernel execution and enter the
kernel debugger at the earliest opportunity.
+ kho= [KEXEC,EARLY]
+ Format: { "0" | "1" | "off" | "on" | "y" | "n" }
+ Enables or disables Kexec HandOver.
+ "0" | "off" | "n" - kexec handover is disabled
+ "1" | "on" | "y" - kexec handover is enabled
+
+ kho_scratch= [KEXEC,EARLY]
+ Format: ll[KMG],mm[KMG],nn[KMG] | nn%
+ Defines the size of the KHO scratch region. The KHO
+ scratch regions are physically contiguous memory
+ ranges that can only be used for non-kernel
+ allocations. That way, even when memory is heavily
+ fragmented with handed over memory, the kexeced
+ kernel will always have enough contiguous ranges to
+ bootstrap itself.
+
+ It is possible to specify the exact amount of
+ memory in the form of "ll[KMG],mm[KMG],nn[KMG]"
+ where the first parameter defines the size of a low
+ memory scratch area, the second parameter defines
+ the size of a global scratch area and the third
+ parameter defines the size of additional per-node
+ scratch areas. The form "nn%" defines scale factor
+ (in percents) of memory that was used during boot.
+
kmac= [MIPS] Korina ethernet MAC address.
Configure the RouterBoard 532 series on-chip
Ethernet adapter MAC address.
- kmemleak= [KNL] Boot-time kmemleak enable/disable
+ kmemleak= [KNL,EARLY] Boot-time kmemleak enable/disable
Valid arguments: on, off
Default: on
Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
@@ -2540,8 +3004,8 @@
See also Documentation/trace/kprobetrace.rst "Kernel
Boot Parameter" section.
- kpti= [ARM64] Control page table isolation of user
- and kernel address spaces.
+ kpti= [ARM64,EARLY] Control page table isolation of
+ user and kernel address spaces.
Default: enabled on cores which need mitigation.
0: force disabled
1: force enabled
@@ -2580,6 +3044,23 @@
Default is Y (on).
+ kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
+ If enabled, KVM will enable virtualization in hardware
+ when KVM is loaded, and disable virtualization when KVM
+ is unloaded (if KVM is built as a module).
+
+ If disabled, KVM will dynamically enable and disable
+ virtualization on-demand when creating and destroying
+ VMs, i.e. on the 0=>1 and 1=>0 transitions of the
+ number of VMs.
+
+ Enabling virtualization at module load avoids potential
+ latency for creation of the 0=>1 VM, as KVM serializes
+ virtualization enabling across all online CPUs. The
+ "cost" of enabling virtualization when KVM is loaded,
+ is that doing so may interfere with using out-of-tree
+ hypervisors that want to "own" virtualization hardware.
+
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
Default is false (don't support).
@@ -2617,43 +3098,87 @@
(enabled). Disable by KVM if hardware lacks support
for NPT.
+ kvm-amd.ciphertext_hiding_asids=
+ [KVM,AMD] Ciphertext hiding prevents disallowed accesses
+ to SNP private memory from reading ciphertext. Instead,
+ reads will see constant default values (0xff).
+
+ If ciphertext hiding is enabled, the joint SEV-ES and
+ SEV-SNP ASID space is partitioned into separate SEV-ES
+ and SEV-SNP ASID ranges, with the SEV-SNP range being
+ [1..max_snp_asid] and the SEV-ES range being
+ (max_snp_asid..min_sev_asid), where min_sev_asid is
+ enumerated by CPUID.0x.8000_001F[EDX].
+
+ A non-zero value enables SEV-SNP ciphertext hiding and
+ adjusts the ASID ranges for SEV-ES and SEV-SNP guests.
+ KVM caps the number of SEV-SNP ASIDs at the maximum
+ possible value, e.g. specifying -1u will assign all
+ joint SEV-ES and SEV-SNP ASIDs to SEV-SNP. Note,
+ assigning all joint ASIDs to SEV-SNP, i.e. configuring
+ max_snp_asid == min_sev_asid-1, will effectively make
+ SEV-ES unusable.
+
kvm-arm.mode=
- [KVM,ARM] Select one of KVM/arm64's modes of operation.
+ [KVM,ARM,EARLY] Select one of KVM/arm64's modes of
+ operation.
none: Forcefully disable KVM.
nvhe: Standard nVHE-based mode, without support for
protected guests.
- protected: nVHE-based mode with support for guests whose
- state is kept private from the host.
+ protected: Mode with support for guests whose state is
+ kept private from the host, using VHE or
+ nVHE depending on HW support.
nested: VHE-based mode with support for nested
- virtualization. Requires at least ARMv8.3
- hardware.
+ virtualization. Requires at least ARMv8.4
+ hardware (with FEAT_NV2).
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
- for the host. "nested" is experimental and should be
- used with extreme caution.
+ for the host. To force nVHE on VHE hardware, add
+ "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the
+ command-line.
+ "nested" is experimental and should be used with
+ extreme caution.
kvm-arm.vgic_v3_group0_trap=
- [KVM,ARM] Trap guest accesses to GICv3 group-0
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
system registers
kvm-arm.vgic_v3_group1_trap=
- [KVM,ARM] Trap guest accesses to GICv3 group-1
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 group-1
system registers
kvm-arm.vgic_v3_common_trap=
- [KVM,ARM] Trap guest accesses to GICv3 common
+ [KVM,ARM,EARLY] Trap guest accesses to GICv3 common
system registers
kvm-arm.vgic_v4_enable=
- [KVM,ARM] Allow use of GICv4 for direct injection of
- LPIs.
+ [KVM,ARM,EARLY] Allow use of GICv4 for direct
+ injection of LPIs.
+
+ kvm-arm.wfe_trap_policy=
+ [KVM,ARM] Control when to set WFE instruction trap for
+ KVM VMs. Traps are allowed but not guaranteed by the
+ CPU architecture.
+
+ trap: set WFE instruction trap
+
+ notrap: clear WFE instruction trap
+
+ kvm-arm.wfi_trap_policy=
+ [KVM,ARM] Control when to set WFI instruction trap for
+ KVM VMs. Traps are allowed but not guaranteed by the
+ CPU architecture.
+
+ trap: set WFI instruction trap
+
+ notrap: clear WFI instruction trap
- kvm_cma_resv_ratio=n [PPC]
+ kvm_cma_resv_ratio=n [PPC,EARLY]
Reserves given percentage from system memory area for
contiguous memory allocation for KVM hash pagetable
allocation.
@@ -2706,7 +3231,7 @@
(enabled). Disable by KVM if hardware lacks support
for it.
- l1d_flush= [X86,INTEL]
+ l1d_flush= [X86,INTEL,EARLY]
Control mitigation for L1D based snooping vulnerability.
Certain CPUs are vulnerable to an exploit against CPU
@@ -2723,7 +3248,7 @@
on - enable the interface for the mitigation
- l1tf= [X86] Control mitigation of the L1TF vulnerability on
+ l1tf= [X86,EARLY] Control mitigation of the L1TF vulnerability on
affected CPUs
The kernel PTE inversion protection is unconditionally
@@ -2792,7 +3317,7 @@
l3cr= [PPC]
- lapic [X86-32,APIC] Enable the local APIC even if BIOS
+ lapic [X86-32,APIC,EARLY] Enable the local APIC even if BIOS
disabled it.
lapic= [X86,APIC] Do not use TSC deadline
@@ -2800,7 +3325,7 @@
back to the programmable timer unit in the LAPIC.
Format: notscdeadline
- lapic_timer_c2_ok [X86,APIC] trust the local apic timer
+ lapic_timer_c2_ok [X86,APIC,EARLY] trust the local apic timer
in C2 power state.
libata.dma= [LIBATA] DMA control
@@ -2895,6 +3420,8 @@
* max_sec_lba48: Set or clear transfer size limit to
65535 sectors.
+ * external: Mark port as external (hotplug-capable).
+
* [no]lpm: Enable or disable link power management.
* [no]setxfer: Indicate if transfer speed mode setting
@@ -2924,7 +3451,7 @@
lockd.nlm_udpport=M [NFS] Assign UDP port.
Format: <integer>
- lockdown= [SECURITY]
+ lockdown= [SECURITY,EARLY]
{ integrity | confidentiality }
Enable the kernel lockdown feature. If set to
integrity, kernel features that allow userland to
@@ -3031,7 +3558,8 @@
logibm.irq= [HW,MOUSE] Logitech Bus Mouse Driver
Format: <irq>
- loglevel= All Kernel Messages with a loglevel smaller than the
+ loglevel= [KNL,EARLY]
+ All Kernel Messages with a loglevel smaller than the
console loglevel will be printed to the console. It can
also be changed with klogd or other programs. The
loglevels are defined as follows:
@@ -3045,13 +3573,15 @@
6 (KERN_INFO) informational
7 (KERN_DEBUG) debug-level messages
- log_buf_len=n[KMG] Sets the size of the printk ring buffer,
- in bytes. n must be a power of two and greater
- than the minimal size. The minimal size is defined
- by LOG_BUF_SHIFT kernel config parameter. There is
- also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
- that allows to increase the default size depending on
- the number of CPUs. See init/Kconfig for more details.
+ log_buf_len=n[KMG] [KNL,EARLY]
+ Sets the size of the printk ring buffer, in bytes.
+ n must be a power of two and greater than the
+ minimal size. The minimal size is defined by
+ LOG_BUF_SHIFT kernel config parameter. There
+ is also CONFIG_LOG_CPU_MAX_BUF_SHIFT config
+ parameter that allows to increase the default size
+ depending on the number of CPUs. See init/Kconfig
+ for more details.
logo.nologo [FB] Disables display of the built-in Linux logo.
This may be used to provide more screen space for
@@ -3089,27 +3619,17 @@
unlikely, in the extreme case this might damage your
hardware.
- ltpc= [NET]
- Format: <io>,<irq>,<dma>
-
lsm.debug [SECURITY] Enable LSM initialization debugging output.
lsm=lsm1,...,lsmN
[SECURITY] Choose order of LSM initialization. This
overrides CONFIG_LSM, and the "security=" parameter.
- machvec= [IA-64] Force the use of a particular machine-vector
- (machvec) in a generic kernel.
- Example: machvec=hpzx1
-
machtype= [Loongson] Share the same kernel image file between
different yeeloong laptops.
Example: machtype=lemote-yeeloong-2f-7inch
- max_addr=nn[KMG] [KNL,BOOT,IA-64] All physical memory greater
- than or equal to this physical address is ignored.
-
- maxcpus= [SMP] Maximum number of processors that an SMP kernel
+ maxcpus= [SMP,EARLY] Maximum number of processors that an SMP kernel
will bring up during bootup. maxcpus=n : n >= 0 limits
the kernel to bring up 'n' processors. Surely after
bootup you can bring up the other plugged cpu by executing
@@ -3125,9 +3645,77 @@
devices can be requested on-demand with the
/dev/loop-control interface.
- mce [X86-32] Machine Check Exception
+ mce= [X86-{32,64}]
+
+ Please see Documentation/arch/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
+
+ off
+ disable machine check
+
+ no_cmci
+ disable CMCI(Corrected Machine Check Interrupt) that
+ Intel processor supports. Usually this disablement is
+ not recommended, but it might be handy if your
+ hardware is misbehaving.
+
+ Note that you'll get more problems without CMCI than
+ with due to the shared banks, i.e. you might get
+ duplicated error logs.
+
+ dont_log_ce
+ don't make logs for corrected errors. All events
+ reported as corrected are silently cleared by OS. This
+ option will be useful if you have no interest in any
+ of corrected errors.
+
+ ignore_ce
+ disable features for corrected errors, e.g.
+ polling timer and CMCI. All events reported as
+ corrected are not cleared by OS and remained in its
+ error banks.
+
+ Usually this disablement is not recommended, however
+ if there is an agent checking/clearing corrected
+ errors (e.g. BIOS or hardware monitoring
+ applications), conflicting with OS's error handling,
+ and you cannot deactivate the agent, then this option
+ will be a help.
+
+ no_lmce
+ do not opt-in to Local MCE delivery. Use legacy method
+ to broadcast MCEs.
+
+ bootlog
+ enable logging of machine checks left over from
+ booting. Disabled by default on AMD Fam10h and older
+ because some BIOS leave bogus ones.
+
+ If your BIOS doesn't do that it's a good idea to
+ enable though to make sure you log even machine check
+ events that result in a reboot. On Intel systems it is
+ enabled by default.
+
+ nobootlog
+ disable boot machine check logging.
+
+ monarchtimeout (number)
+ sets the time in us to wait for other CPUs on machine
+ checks. 0 to disable.
+
+ bios_cmci_threshold
+ don't overwrite the bios-set CMCI threshold. This boot
+ option prevents Linux from overwriting the CMCI
+ threshold set by the bios. Without this option, Linux
+ always sets the CMCI threshold to 1. Enabling this may
+ make memory predictive failure analysis less effective
+ if the bios sets thresholds for memory errors since we
+ will not see details for all errors.
+
+ recovery
+ force-enable recoverable machine check code paths
+
+ Everything else is in sysfs now.
- mce=option [X86-64] See Documentation/arch/x86/x86_64/boot-options.rst
md= [HW] RAID subsystems devices and level
See Documentation/admin-guide/md.rst.
@@ -3136,7 +3724,7 @@
Format: <first>,<last>
Specifies range of consoles to be captured by the MDA.
- mds= [X86,INTEL]
+ mds= [X86,INTEL,EARLY]
Control mitigation for the Micro-architectural Data
Sampling (MDS) vulnerability.
@@ -3168,11 +3756,12 @@
For details see: Documentation/admin-guide/hw-vuln/mds.rst
- mem=nn[KMG] [HEXAGON] Set the memory size.
+ mem=nn[KMG] [HEXAGON,EARLY] Set the memory size.
Must be specified, otherwise memory size will be 0.
- mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
- Amount of memory to be used in cases as follows:
+ mem=nn[KMG] [KNL,BOOT,EARLY] Force usage of a specific amount
+ of memory Amount of memory to be used in cases
+ as follows:
1 for test;
2 when the kernel is not able to see the whole system memory;
@@ -3196,8 +3785,8 @@
if system memory of hypervisor is not sufficient.
mem=nn[KMG]@ss[KMG]
- [ARM,MIPS] - override the memory layout reported by
- firmware.
+ [ARM,MIPS,EARLY] - override the memory layout
+ reported by firmware.
Define a memory region of size nn[KMG] starting at
ss[KMG].
Multiple different regions can be specified with
@@ -3206,7 +3795,7 @@
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
memory.
- memblock=debug [KNL] Enable memblock debug messages.
+ memblock=debug [KNL,EARLY] Enable memblock debug messages.
memchunk=nn[KMG]
[KNL,SH] Allow user to override the default size for
@@ -3216,18 +3805,18 @@
[KNL] Set the initial state for the memory hotplug
onlining policy. If not specified, the default value is
set according to the
- CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel config
- option.
+ CONFIG_MHP_DEFAULT_ONLINE_TYPE kernel config
+ options.
See Documentation/admin-guide/mm/memory-hotplug.rst.
- memmap=exactmap [KNL,X86] Enable setting of an exact
+ memmap=exactmap [KNL,X86,EARLY] Enable setting of an exact
E820 memory map, as specified by the user.
Such memmap=exactmap lines can be constructed based on
BIOS output or other requirements. See the memmap=nn@ss
option description.
memmap=nn[KMG]@ss[KMG]
- [KNL, X86, MIPS, XTENSA] Force usage of a specific region of memory.
+ [KNL, X86,MIPS,XTENSA,EARLY] Force usage of a specific region of memory.
Region of memory to be used is from ss to ss+nn.
If @ss[KMG] is omitted, it is equivalent to mem=nn[KMG],
which limits max address to nn[KMG].
@@ -3237,11 +3826,11 @@
memmap=100M@2G,100M#3G,1G!1024G
memmap=nn[KMG]#ss[KMG]
- [KNL,ACPI] Mark specific memory as ACPI data.
+ [KNL,ACPI,EARLY] Mark specific memory as ACPI data.
Region of memory to be marked is from ss to ss+nn.
memmap=nn[KMG]$ss[KMG]
- [KNL,ACPI] Mark specific memory as reserved.
+ [KNL,ACPI,EARLY] Mark specific memory as reserved.
Region of memory to be reserved is from ss to ss+nn.
Example: Exclude memory from 0x18690000-0x1869ffff
memmap=64K$0x18690000
@@ -3251,14 +3840,14 @@
like Grub2, otherwise '$' and the following number
will be eaten.
- memmap=nn[KMG]!ss[KMG]
+ memmap=nn[KMG]!ss[KMG,EARLY]
[KNL,X86] Mark specific memory as protected.
Region of memory to be used, from ss to ss+nn.
The memory region may be marked as e820 type 12 (0xc)
and is NVDIMM or ADR memory.
memmap=<size>%<offset>-<oldtype>+<newtype>
- [KNL,ACPI] Convert memory within the specified region
+ [KNL,ACPI,EARLY] Convert memory within the specified region
from <oldtype> to <newtype>. If "-<oldtype>" is left
out, the whole region will be marked as <newtype>,
even if previously unavailable. If "+<newtype>" is left
@@ -3266,25 +3855,25 @@
specified as e820 types, e.g., 1 = RAM, 2 = reserved,
3 = ACPI, 12 = PRAM.
- memory_corruption_check=0/1 [X86]
+ memory_corruption_check=0/1 [X86,EARLY]
Some BIOSes seem to corrupt the first 64k of
memory when doing things like suspend/resume.
Setting this option will scan the memory
looking for corruption. Enabling this will
both detect corruption and prevent the kernel
from using the memory being corrupted.
- However, its intended as a diagnostic tool; if
+ However, it's intended as a diagnostic tool; if
repeatable BIOS-originated corruption always
affects the same memory, you can use memmap=
to prevent the kernel from using that memory.
- memory_corruption_check_size=size [X86]
+ memory_corruption_check_size=size [X86,EARLY]
By default it checks for corruption in the low
64k, making this memory unavailable for normal
use. Use this parameter to scan for
corruption in more or less memory.
- memory_corruption_check_period=seconds [X86]
+ memory_corruption_check_period=seconds [X86,EARLY]
By default it checks for corruption every 60
seconds. Use this parameter to check at some
other rate. 0 disables periodic checking.
@@ -3308,7 +3897,7 @@
Note that even when enabled, there are a few cases where
the feature is not effective.
- memtest= [KNL,X86,ARM,M68K,PPC,RISCV] Enable memtest
+ memtest= [KNL,X86,ARM,M68K,PPC,RISCV,EARLY] Enable memtest
Format: <integer>
default : 0 <disable>
Specifies the number of memtest passes to be
@@ -3320,9 +3909,7 @@
mem_encrypt= [X86-64] AMD Secure Memory Encryption (SME) control
Valid arguments: on, off
- Default (depends on kernel configuration option):
- on (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y)
- off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n)
+ Default: off
mem_encrypt=on: Activate SME
mem_encrypt=off: Do not activate SME
@@ -3335,10 +3922,6 @@
deep - Suspend-To-RAM or equivalent (if supported)
See Documentation/admin-guide/pm/sleep-states.rst.
- mfgpt_irq= [IA-32] Specify the IRQ to use for the
- Multi-Function General Purpose Timers on AMD Geode
- platforms.
-
mfgptfix [X86-32] Fix MFGPT timers on AMD Geode platforms when
the BIOS has incorrectly applied a workaround. TinyBIOS
version 0.98 is known to be affected, 0.99 fixes the
@@ -3346,14 +3929,19 @@
mga= [HW,DRM]
- microcode.force_minrev= [X86]
- Format: <bool>
+ microcode= [X86] Control the behavior of the microcode loader.
+ Available options, comma separated:
+
+ base_rev=X - with <X> with format: <u32>
+ Set the base microcode revision of each thread when in
+ debug mode.
+
+ dis_ucode_ldr: disable the microcode loader
+
+ force_minrev:
Enable or disable the microcode minimal revision
enforcement for the runtime microcode loader.
- min_addr=nn[KMG] [KNL,BOOT,IA-64] All physical memory below this
- physical address is ignored.
-
mini2440= [ARM,HW,KNL]
Format:[0..2][b][c][t]
Default: "0tb"
@@ -3376,17 +3964,21 @@
https://repo.or.cz/w/linux-2.6/mini2440.git
mitigations=
- [X86,PPC,S390,ARM64] Control optional mitigations for
+ [X86,PPC,S390,ARM64,EARLY] Control optional mitigations for
CPU vulnerabilities. This is a set of curated,
arch-independent options, each of which is an
aggregation of existing arch-specific options.
+ Note, "mitigations" is supported if and only if the
+ kernel was built with CPU_MITIGATIONS=y.
+
off
Disable all optional CPU mitigations. This
improves system performance, but it may also
expose users to several CPU vulnerabilities.
Equivalent to: if nokaslr then kpti=0 [ARM64]
gather_data_sampling=off [X86]
+ indirect_target_selection=off [X86]
kvm.nx_huge_pages=off [X86]
l1tf=off [X86]
mds=off [X86]
@@ -3398,12 +3990,16 @@
nospectre_bhb [ARM64]
nospectre_v1 [X86,PPC]
nospectre_v2 [X86,PPC,S390,ARM64]
+ reg_file_data_sampling=off [X86]
retbleed=off [X86]
+ spec_rstack_overflow=off [X86]
spec_store_bypass_disable=off [X86,PPC]
+ spectre_bhi=off [X86]
spectre_v2_user=off [X86]
srbds=off [X86,INTEL]
ssbd=force-off [ARM64]
tsx_async_abort=off [X86]
+ vmscape=off [X86]
Exceptions:
This does not have any effect on
@@ -3428,8 +4024,12 @@
mmio_stale_data=full,nosmt [X86]
retbleed=auto,nosmt [X86]
+ [X86] After one of the above options, additionally
+ supports attack-vector based controls as documented in
+ Documentation/admin-guide/hw-vuln/attack_vector_controls.rst
+
mminit_loglevel=
- [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+ [KNL,EARLY] When CONFIG_DEBUG_MEMORY_INIT is set, this
parameter allows control of the logging verbosity for
the additional memory initialisation checks. A value
of 0 disables mminit logging and a level of 4 will
@@ -3437,7 +4037,7 @@
so loglevel=8 may also need to be specified.
mmio_stale_data=
- [X86,INTEL] Control mitigation for the Processor
+ [X86,INTEL,EARLY] Control mitigation for the Processor
MMIO Stale Data vulnerabilities.
Processor MMIO Stale Data is a class of
@@ -3512,7 +4112,7 @@
mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
reporting absolute coordinates, such as tablets
- movablecore= [KNL,X86,IA-64,PPC]
+ movablecore= [KNL,X86,PPC,EARLY]
Format: nn[KMGTPE] | nn%
This parameter is the complement to kernelcore=, it
specifies the amount of memory used for migratable
@@ -3523,7 +4123,7 @@
that the amount of memory usable for all allocations
is not too small.
- movable_node [KNL] Boot-time switch to make hotplugable memory
+ movable_node [KNL,EARLY] Boot-time switch to make hotplugable memory
NUMA nodes to be movable. This means that the memory
of such nodes will be usable only for movable
allocations which rules out almost all kernel
@@ -3538,30 +4138,25 @@
mtdparts= [MTD]
See drivers/mtd/parsers/cmdlinepart.c
- mtdset= [ARM]
- ARM/S3C2412 JIVE boot control
-
- See arch/arm/mach-s3c/mach-jive.c
-
mtouchusb.raw_coordinates=
[HW] Make the MicroTouch USB driver use raw coordinates
('y', default) or cooked coordinates ('n')
- mtrr=debug [X86]
+ mtrr=debug [X86,EARLY]
Enable printing debug information related to MTRR
registers at boot time.
- mtrr_chunk_size=nn[KMG] [X86]
+ mtrr_chunk_size=nn[KMG,X86,EARLY]
used for mtrr cleanup. It is largest continuous chunk
that could hold holes aka. UC entries.
- mtrr_gran_size=nn[KMG] [X86]
+ mtrr_gran_size=nn[KMG,X86,EARLY]
Used for mtrr cleanup. It is granularity of mtrr block.
Default is 1.
Large value could prevent small alignment from
using up MTRRs.
- mtrr_spare_reg_nr=n [X86]
+ mtrr_spare_reg_nr=n [X86,EARLY]
Format: <integer>
Range: 0,7 : spare reg number
Default : 1
@@ -3728,10 +4323,12 @@
Format: [state][,regs][,debounce][,die]
nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
- Format: [panic,][nopanic,][num]
+ Format: [panic,][nopanic,][rNNN,][num]
Valid num: 0 or 1
0 - turn hardlockup detector in nmi_watchdog off
1 - turn hardlockup detector in nmi_watchdog on
+ rNNN - configure the watchdog with raw perf event 0xNNN
+
When panic is specified, panic when an NMI watchdog
timeout occurs (or 'nopanic' to not panic on an NMI
watchdog, if CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is set)
@@ -3747,27 +4344,22 @@
emulation library even if a 387 maths coprocessor
is present.
- no4lvl [RISCV] Disable 4-level and 5-level paging modes. Forces
- kernel to use 3-level paging instead.
+ no4lvl [RISCV,EARLY] Disable 4-level and 5-level paging modes.
+ Forces kernel to use 3-level paging instead.
- no5lvl [X86-64,RISCV] Disable 5-level paging mode. Forces
+ no5lvl [X86-64,RISCV,EARLY] Disable 5-level paging mode. Forces
kernel to use 4-level paging instead.
- noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
- caches in the slab allocator. Saves per-node memory,
- but will impact performance.
-
noalign [KNL,ARM]
- noaltinstr [S390] Disables alternative instructions patching
- (CPU alternatives feature).
-
- noapic [SMP,APIC] Tells the kernel to not make use of any
+ noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any
IOAPICs that may be present in the system.
+ noapictimer [APIC,X86] Don't set up the APIC timer
+
noautogroup Disable scheduler automatic task group creation.
- nocache [ARM]
+ nocache [ARM,EARLY]
no_console_suspend
[HW] Never suspend the console
@@ -3785,15 +4377,13 @@
turn on/off it dynamically.
no_debug_objects
- [KNL] Disable object debugging
+ [KNL,EARLY] Disable object debugging
nodsp [SH] Disable hardware DSP at boot time.
- noefi Disable EFI runtime services support.
+ noefi [EFI,EARLY] Disable EFI runtime services support.
- no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel.
-
- noexec [IA-64]
+ no_entry_flush [PPC,EARLY] Don't flush the L1-D cache when entering the kernel.
noexec32 [X86-64]
This affects only 32-bit executables.
@@ -3814,30 +4404,15 @@
register save and restore. The kernel will only save
legacy floating-point registers on task switch.
- nohalt [IA-64] Tells the kernel not to use the power saving
- function PAL_HALT_LIGHT when idle. This increases
- power-consumption. On the positive side, it reduces
- interrupt wake-up latency, which may improve performance
- in certain environments such as networked servers or
- real-time systems.
+ nogbpages [X86] Do not use GB pages for kernel direct mappings.
no_hash_pointers
- Force pointers printed to the console or buffers to be
- unhashed. By default, when a pointer is printed via %p
- format string, that pointer is "hashed", i.e. obscured
- by hashing the pointer value. This is a security feature
- that hides actual kernel addresses from unprivileged
- users, but it also makes debugging the kernel more
- difficult since unequal pointers can no longer be
- compared. However, if this command-line option is
- specified, then all normal pointers will have their true
- value printed. This option should only be specified when
- debugging the kernel. Please do not use on production
- kernels.
+ [KNL,EARLY]
+ Alias for "hash_pointers=never".
nohibernate [HIBERNATION] Disable hibernation and resume.
- nohlt [ARM,ARM64,MICROBLAZE,MIPS,PPC,SH] Forces the kernel to
+ nohlt [ARM,ARM64,MICROBLAZE,MIPS,PPC,RISCV,SH] Forces the kernel to
busy wait in do_idle() and not use the arch_cpu_idle()
implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP
to be effective. This is useful on platforms where the
@@ -3846,9 +4421,11 @@
the impact of the sleep instructions. This is also
useful when using JTAG debugger.
- nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
+ nohpet [X86] Don't use the HPET timer.
+
+ nohugeiomap [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge I/O mappings.
- nohugevmalloc [KNL,X86,PPC,ARM64] Disable kernel huge vmalloc mappings.
+ nohugevmalloc [KNL,X86,PPC,ARM64,EARLY] Disable kernel huge vmalloc mappings.
nohz= [KNL] Boottime enable/disable dynamic ticks
Valid arguments: on, off
@@ -3870,13 +4447,11 @@
noinitrd [RAM] Tells the kernel not to load any configured
initial RAM disk.
- nointremap [X86-64, Intel-IOMMU] Do not enable interrupt
+ nointremap [X86-64,Intel-IOMMU,EARLY] Do not enable interrupt
remapping.
[Deprecated - use intremap=off]
- nointroute [IA-64]
-
- noinvpcid [X86] Disable the INVPCID cpu feature.
+ noinvpcid [X86,EARLY] Disable the INVPCID cpu feature.
noiotrap [SH] Disables trapped I/O port accesses.
@@ -3885,23 +4460,19 @@
noisapnp [ISAPNP] Disables ISA PnP code.
- nojitter [IA-64] Disables jitter checking for ITC timers.
-
- nokaslr [KNL]
+ nokaslr [KNL,EARLY]
When CONFIG_RANDOMIZE_BASE is set, this disables
kernel and module base offset ASLR (Address Space
Layout Randomization).
- no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
+ no-kvmapf [X86,KVM,EARLY] Disable paravirtualized asynchronous page
fault handling.
- no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
-
- nolapic [X86-32,APIC] Do not enable or use the local APIC.
+ no-kvmclock [X86,KVM,EARLY] Disable paravirtualized KVM clock driver
- nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
+ nolapic [X86-32,APIC,EARLY] Do not enable or use the local APIC.
- nomca [IA-64] Disable machine check abort handling
+ nolapic_timer [X86-32,APIC,EARLY] Do not use the local APIC timer.
nomce [X86-32] Disable Machine Check Exception
@@ -3924,23 +4495,23 @@
shutdown the other cpus. Instead use the REBOOT_VECTOR
irq.
- nopat [X86] Disable PAT (page attribute table extension of
+ nopat [X86,EARLY] Disable PAT (page attribute table extension of
pagetables) support.
- nopcid [X86-64] Disable the PCID cpu feature.
+ nopcid [X86-64,EARLY] Disable the PCID cpu feature.
nopku [X86] Disable Memory Protection Keys CPU feature found
in some Intel CPUs.
- nopti [X86-64]
+ nopti [X86-64,EARLY]
Equivalent to pti=off
- nopv= [X86,XEN,KVM,HYPER_V,VMWARE]
+ nopv= [X86,XEN,KVM,HYPER_V,VMWARE,EARLY]
Disables the PV optimizations forcing the guest to run
as generic guest with no PV drivers. Currently support
XEN HVM, KVM, HYPER_V and VMWARE guest.
- nopvspin [X86,XEN,KVM]
+ nopvspin [X86,XEN,KVM,EARLY]
Disables the qspinlock slow path using PV optimizations
which allow the hypervisor to 'idle' the guest on lock
contention.
@@ -3954,61 +4525,62 @@
noresume [SWSUSP] Disables resume and restores original swap
space.
- nosbagart [IA-64]
-
no-scroll [VGA] Disables scrollback.
This is required for the Braillex ib80-piezo Braille
reader made by F.H. Papenmeier (Germany).
- nosgx [X86-64,SGX] Disables Intel SGX kernel support.
+ nosgx [X86-64,SGX,EARLY] Disables Intel SGX kernel support.
- nosmap [PPC]
+ nosmap [PPC,EARLY]
Disable SMAP (Supervisor Mode Access Prevention)
even if it is supported by processor.
- nosmep [PPC64s]
+ nosmep [PPC64s,EARLY]
Disable SMEP (Supervisor Mode Execution Prevention)
even if it is supported by processor.
- nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
+ nosmp [SMP,EARLY] Tells an SMP kernel to act as a UP kernel,
and disable the IO APIC. legacy for "maxcpus=0".
- nosmt [KNL,MIPS,PPC,S390] Disable symmetric multithreading (SMT).
+ nosmt [KNL,MIPS,PPC,EARLY] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
- [KNL,X86,PPC] Disable symmetric multithreading (SMT).
+ [KNL,X86,PPC,S390] Disable symmetric multithreading (SMT).
nosmt=force: Force disable SMT, cannot be undone
via the sysfs control file.
nosoftlockup [KNL] Disable the soft-lockup detector.
nospec_store_bypass_disable
- [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+ [HW,EARLY] Disable all mitigations for the Speculative
+ Store Bypass vulnerability
- nospectre_bhb [ARM64] Disable all mitigations for Spectre-BHB (branch
+ nospectre_bhb [ARM64,EARLY] Disable all mitigations for Spectre-BHB (branch
history injection) vulnerability. System may allow data leaks
with this option.
- nospectre_v1 [X86,PPC] Disable mitigations for Spectre Variant 1
+ nospectre_v1 [X86,PPC,EARLY] Disable mitigations for Spectre Variant 1
(bounds check bypass). With this option data leaks are
possible in the system.
- nospectre_v2 [X86,PPC_E500,ARM64] Disable all mitigations for
- the Spectre variant 2 (indirect branch prediction)
- vulnerability. System may allow data leaks with this
- option.
+ nospectre_v2 [X86,PPC_E500,ARM64,EARLY] Disable all mitigations
+ for the Spectre variant 2 (indirect branch
+ prediction) vulnerability. System may allow data
+ leaks with this option.
- no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES,RISCV] Disable
- paravirtualized steal time accounting. steal time is
- computed, but won't influence scheduler behaviour
+ no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES,RISCV,LOONGARCH,EARLY]
+ Disable paravirtualized steal time accounting. steal time
+ is computed, but won't influence scheduler behaviour
nosync [HW,M68K] Disables sync negotiation for all devices.
- no_timer_check [X86,APIC] Disables the code which tests for
- broken timer IRQ sources.
+ no_timer_check [X86,APIC] Disables the code which tests for broken
+ timer IRQ sources, i.e., the IO-APIC timer. This can
+ work around problems with incorrect timer
+ initialization on some boards.
no_uaccess_flush
- [PPC] Don't flush the L1-D cache after accessing user data.
+ [PPC,EARLY] Don't flush the L1-D cache after accessing user data.
novmcoredd [KNL,KDUMP]
Disable device dump. Device dump allows drivers to
@@ -4022,15 +4594,15 @@
is set.
no-vmw-sched-clock
- [X86,PV_OPS] Disable paravirtualized VMware scheduler
- clock and use the default one.
+ [X86,PV_OPS,EARLY] Disable paravirtualized VMware
+ scheduler clock and use the default one.
nowatchdog [KNL] Disable both lockup detectors, i.e.
soft-lockup and NMI watchdog (hard-lockup).
- nowb [ARM]
+ nowb [ARM,EARLY]
- nox2apic [X86-64,APIC] Do not enable x2APIC mode.
+ nox2apic [X86-64,APIC,EARLY] Do not enable x2APIC mode.
NOTE: this parameter will be ignored on systems with the
LEGACY_XAPIC_DISABLED bit set in the
@@ -4055,20 +4627,7 @@
parameter, xsave area per process might occupy more
memory on xsaves enabled systems.
- nps_mtm_hs_ctr= [KNL,ARC]
- This parameter sets the maximum duration, in
- cycles, each HW thread of the CTOP can run
- without interruptions, before HW switches it.
- The actual maximum duration is 16 times this
- parameter's value.
- Format: integer between 1 and 255
- Default: 255
-
- nptcg= [IA-64] Override max number of concurrent global TLB
- purges which is reported from either PAL_VM_SUMMARY or
- SAL PALO.
-
- nr_cpus= [SMP] Maximum number of processors that an SMP kernel
+ nr_cpus= [SMP,EARLY] Maximum number of processors that an SMP kernel
could support. nr_cpus=n : n >= 1 limits the kernel to
support 'n' processors. It could be larger than the
number of already plugged CPU during bootup, later in
@@ -4079,8 +4638,29 @@
nr_uarts= [SERIAL] maximum number of UARTs to be registered.
- numa=off [KNL, ARM64, PPC, RISCV, SPARC, X86] Disable NUMA, Only
- set up a single NUMA node spanning all memory.
+ numa=off [KNL, ARM64, PPC, RISCV, SPARC, X86, EARLY]
+ Disable NUMA, Only set up a single NUMA node
+ spanning all memory.
+
+ numa=fake=<size>[MG]
+ [KNL, ARM64, RISCV, X86, EARLY]
+ If given as a memory unit, fills all system RAM with
+ nodes of size interleaved over physical nodes.
+
+ numa=fake=<N>
+ [KNL, ARM64, RISCV, X86, EARLY]
+ If given as an integer, fills all system RAM with N
+ fake nodes interleaved over physical nodes.
+
+ numa=fake=<N>U
+ [KNL, ARM64, RISCV, X86, EARLY]
+ If given as an integer followed by 'U', it will
+ divide each physical node into N emulated nodes.
+
+ numa=noacpi [X86] Don't parse the SRAT table for NUMA setup
+
+ numa=nohmat [X86] Don't parse the HMAT table for NUMA setup, or
+ soft-reserved memory partitioning.
numa_balancing= [KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
NUMA balancing.
@@ -4091,7 +4671,7 @@
This can be set from sysctl after boot.
See Documentation/admin-guide/sysctl/vm.rst for details.
- ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
+ ohci1394_dma=early [HW,EARLY] enable debugging via the ohci1394 driver.
See Documentation/core-api/debugging-via-ohci1394.rst for more
info.
@@ -4117,7 +4697,8 @@
Once locked, the boundary cannot be changed.
1 indicates lock status, 0 indicates unlock status.
- oops=panic Always panic on oopses. Default is to just kill the
+ oops=panic [KNL,EARLY]
+ Always panic on oopses. Default is to just kill the
process, but there is a small probability of
deadlocking the machine.
This will also cause panics on machine check exceptions.
@@ -4125,21 +4706,19 @@
page_alloc.shuffle=
[KNL] Boolean flag to control whether the page allocator
- should randomize its free lists. The randomization may
- be automatically enabled if the kernel detects it is
- running on a platform with a direct-mapped memory-side
- cache, and this parameter can be used to
- override/disable that behavior. The state of the flag
- can be read from sysfs at:
+ should randomize its free lists. This parameter can be
+ used to enable/disable page randomization. The state of
+ the flag can be read from sysfs at:
/sys/module/page_alloc/parameters/shuffle.
+ This parameter is only available if CONFIG_SHUFFLE_PAGE_ALLOCATOR=y.
- page_owner= [KNL] Boot-time page_owner enabling option.
+ page_owner= [KNL,EARLY] Boot-time page_owner enabling option.
Storage of the information about who allocated
each page is disabled in default. With this switch,
we can turn it on.
on: enable the feature
- page_poison= [KNL] Boot-time parameter changing the state of
+ page_poison= [KNL,EARLY] Boot-time parameter changing the state of
poisoning on the buddy allocator, available with
CONFIG_PAGE_POISONING=y.
off: turn off poisoning (default)
@@ -4157,7 +4736,8 @@
timeout < 0: reboot immediately
Format: <timeout>
- panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
+ panic_on_taint= [KNL,EARLY]
+ Bitmask for conditionally calling panic() in add_taint()
Format: <hex>[,nousertaint]
Hexadecimal bitmask representing the set of TAINT flags
that will cause the kernel to panic when add_taint() is
@@ -4180,13 +4760,33 @@
bit 2: print timer info
bit 3: print locks info if CONFIG_LOCKDEP is on
bit 4: print ftrace buffer
- bit 5: print all printk messages in buffer
+ bit 5: replay all kernel messages on consoles at the end of panic
bit 6: print all CPUs backtrace (if available in the arch)
+ bit 7: print only tasks in uninterruptible (blocked) state
*Be aware* that this option may print a _lot_ of lines,
so there are risks of losing older messages in the log.
Use this option carefully, maybe worth to setup a
bigger log buffer with "log_buf_len" along with this.
+ panic_sys_info= A comma separated list of extra information to be dumped
+ on panic.
+ Format: val[,val...]
+ Where @val can be any of the following:
+
+ tasks: print all tasks info
+ mem: print system memory info
+ timers: print timers info
+ locks: print locks info if CONFIG_LOCKDEP is on
+ ftrace: print ftrace buffer
+ all_bt: print all CPUs backtrace (if available in the arch)
+ blocked_tasks: print only tasks in uninterruptible (blocked) state
+
+ This is a human readable alternative to the 'panic_print' option.
+
+ panic_console_replay
+ When panic happens, replay all kernel messages on
+ consoles at the end of panic.
+
parkbd.port= [HW] Parallel port number the keyboard adapter is
connected to, default is 0.
Format: <parport#>
@@ -4313,7 +4913,7 @@
pcbit= [HW,ISDN]
- pci=option[,option...] [PCI] various PCI subsystem options.
+ pci=option[,option...] [PCI,EARLY] various PCI subsystem options.
Some options herein operate on a specific device
or a set of devices (<pci_dev>). These are
@@ -4539,14 +5139,51 @@
bridges without forcing it upstream. Note:
this removes isolation between devices and
may put more devices in an IOMMU group.
+ config_acs=
+ Format:
+ <ACS flags>@<pci_dev>[; ...]
+ Specify one or more PCI devices (in the format
+ specified above) optionally prepended with flags
+ and separated by semicolons. The respective
+ capabilities will be enabled, disabled or
+ unchanged based on what is specified in
+ flags.
+
+ ACS Flags is defined as follows:
+ bit-0 : ACS Source Validation
+ bit-1 : ACS Translation Blocking
+ bit-2 : ACS P2P Request Redirect
+ bit-3 : ACS P2P Completion Redirect
+ bit-4 : ACS Upstream Forwarding
+ bit-5 : ACS P2P Egress Control
+ bit-6 : ACS Direct Translated P2P
+ Each bit can be marked as:
+ '0' – force disabled
+ '1' – force enabled
+ 'x' – unchanged
+ For example,
+ pci=config_acs=10x@pci:0:0
+ would configure all devices that support
+ ACS to enable P2P Request Redirect, disable
+ Translation Blocking, and leave Source
+ Validation unchanged from whatever power-up
+ or firmware set it to.
+
+ Note: this may remove isolation between devices
+ and may put more devices in an IOMMU group.
force_floating [S390] Force usage of floating interrupts.
nomio [S390] Do not use MIO instructions.
norid [S390] ignore the RID field and force use of
one PCI domain per PCI function
+ notph [PCIE] If the PCIE_TPH kernel config parameter
+ is enabled, this kernel boot option can be used
+ to disable PCIe TLP Processing Hints support
+ system-wide.
- pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
+ pcie_aspm= [PCIE] Forcibly enable or ignore PCIe Active State Power
Management.
- off Disable ASPM.
+ off Don't touch ASPM configuration at all. Leave any
+ configuration done by firmware unchanged.
force Enable ASPM even on devices that claim not to support it.
WARNING: Forcing ASPM on may cause system lockups.
@@ -4582,7 +5219,8 @@
Format: { 0 | 1 }
See arch/parisc/kernel/pdc_chassis.c
- percpu_alloc= Select which percpu first chunk allocator to use.
+ percpu_alloc= [MM,EARLY]
+ Select which percpu first chunk allocator to use.
Currently supported values are "embed" and "page".
Archs may support subset or none of the selections.
See comments in mm/percpu.c for details on each
@@ -4608,6 +5246,18 @@
that number, otherwise (e.g., 'pmu_override=on'), MMCR1
remains 0.
+ pm_async= [PM]
+ Format: off
+ This parameter sets the initial value of the
+ /sys/power/pm_async sysfs knob at boot time.
+ If set to "off", disables asynchronous suspend and
+ resume of devices during system-wide power transitions.
+ This can be useful on platforms where device
+ dependencies are not well-defined, or for debugging
+ power management issues. Asynchronous operations are
+ enabled by default.
+
+
pm_debug_messages [SUSPEND,KNL]
Enable suspend/resume debug messages during boot up.
@@ -4644,6 +5294,11 @@
may be specified.
Format: <port>,<port>....
+ possible_cpus= [SMP,S390,X86]
+ Format: <unsigned int>
+ Set the number of possible CPUs, overriding the
+ regular discovery mechanisms (such as ACPI/FW, etc).
+
powersave=off [PPC] This option disables power saving features.
It specifically disables cpuidle and sets the
platform machine description specific power_save
@@ -4651,12 +5306,12 @@
execution priority.
ppc_strict_facility_enable
- [PPC] This option catches any kernel floating point,
+ [PPC,ENABLE] This option catches any kernel floating point,
Altivec, VSX and SPE outside of regions specifically
allowed (eg kernel_enable_fpu()/kernel_disable_fpu()).
There is some performance impact when enabling this.
- ppc_tm= [PPC]
+ ppc_tm= [PPC,EARLY]
Format: {"off"}
Disable Hardware Transactional Memory
@@ -4665,7 +5320,14 @@
none - Limited to cond_resched() calls
voluntary - Limited to cond_resched() and might_sleep() calls
full - Any section that isn't explicitly preempt disabled
- can be preempted anytime.
+ can be preempted anytime. Tasks will also yield
+ contended spinlocks (if the critical section isn't
+ explicitly preempt disabled beyond the lock itself).
+ lazy - Scheduler controlled. Similar to full but instead
+ of preempting the task immediately, the task gets
+ one HZ tick time to yield itself before the
+ preemption will be forced. One preemption is when the
+ task returns to user space.
print-fatal-signals=
[KNL] debug: print fatal signals
@@ -4695,6 +5357,14 @@
Format: <bool>
default: 0 (auto_verbose is enabled)
+ printk.debug_non_panic_cpus=
+ Allows storing messages from non-panic CPUs into
+ the printk log buffer during panic(). They are
+ flushed to consoles by the panic-CPU on
+ a best-effort basis.
+ Format: <bool> (1/Y/y=enable, 0/N/n=disable)
+ Default: disabled
+
printk.devkmsg={on,off,ratelimit}
Control writing to /dev/kmsg.
on - unlimited logging to /dev/kmsg from userspace
@@ -4705,6 +5375,16 @@
printk.time= Show timing data prefixed to each printk message line
Format: <bool> (1/Y/y=enable, 0/N/n=disable)
+ proc_mem.force_override= [KNL]
+ Format: {always | ptrace | never}
+ Traditionally /proc/pid/mem allows memory permissions to be
+ overridden without restrictions. This option may be set to
+ restrict that. Can be one of:
+ - 'always': traditional behavior always allows mem overrides.
+ - 'ptrace': only allow mem overrides for active ptracers.
+ - 'never': never allow mem overrides.
+ If not specified, default is the CONFIG_PROC_MEM_* choice.
+
processor.max_cstate= [HW,ACPI]
Limit processor to maximum C-state
max_cstate=9 overrides any DMI blacklist limit.
@@ -4715,11 +5395,9 @@
profile= [KNL] Enable kernel profiling via /proc/profile
Format: [<profiletype>,]<number>
- Param: <profiletype>: "schedule", "sleep", or "kvm"
+ Param: <profiletype>: "schedule" or "kvm"
[defaults to kernel profiling]
Param: "schedule" - profile schedule points.
- Param: "sleep" - profile D-state sleeping (millisecs).
- Requires CONFIG_SCHEDSTATS
Param: "kvm" - profile VM exits.
Param: <number> - step/bucket size as a power of 2 for
statistical time based profiling.
@@ -4728,7 +5406,9 @@
prot_virt= [S390] enable hosting protected virtual machines
isolated from the hypervisor (if hardware supports
- that).
+ that). If enabled, the default kernel base address
+ might be overridden even when Kernel Address Space
+ Layout Randomization is disabled.
Format: <bool>
psi= [KNL] Enable or disable pressure stall information
@@ -4766,7 +5446,7 @@
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.
- quiet [KNL] Disable most log messages
+ quiet [KNL,EARLY] Disable most log messages
r128= [HW,DRM]
@@ -4783,17 +5463,17 @@
ramdisk_start= [RAM] RAM disk image start address
random.trust_cpu=off
- [KNL] Disable trusting the use of the CPU's
+ [KNL,EARLY] Disable trusting the use of the CPU's
random number generator (if available) to
initialize the kernel's RNG.
random.trust_bootloader=off
- [KNL] Disable trusting the use of the a seed
+ [KNL,EARLY] Disable trusting the use of the a seed
passed by the bootloader (if available) to
initialize the kernel's RNG.
randomize_kstack_offset=
- [KNL] Enable or disable kernel stack offset
+ [KNL,EARLY] Enable or disable kernel stack offset
randomization, which provides roughly 5 bits of
entropy, frustrating memory corruption attacks
that depend on stack address determinism or
@@ -4852,6 +5532,10 @@
Set maximum number of finished RCU callbacks to
process in one batch.
+ rcutree.csd_lock_suppress_rcu_stall= [KNL]
+ Do only a one-line RCU CPU stall warning when
+ there is an ongoing too-long CSD-lock wait.
+
rcutree.do_rcu_barrier= [KNL]
Request a call to rcu_barrier(). This is
throttled so that userspace tests can safely
@@ -4929,6 +5613,14 @@
the ->nocb_bypass queue. The definition of "too
many" is supplied by this kernel boot parameter.
+ rcutree.nohz_full_patience_delay= [KNL]
+ On callback-offloaded (rcu_nocbs) CPUs, avoid
+ disturbing RCU unless the grace period has
+ reached the specified age in milliseconds.
+ Defaults to zero. Large values will be capped
+ at five seconds. All values will be rounded down
+ to the nearest value representable by jiffies.
+
rcutree.qhimark= [KNL]
Set threshold of queued RCU callbacks beyond which
batch limiting is disabled.
@@ -5034,6 +5726,26 @@
this kernel boot parameter, forcibly setting it
to zero.
+ rcutree.enable_rcu_lazy= [KNL]
+ To save power, batch RCU callbacks and flush after
+ delay, memory pressure or callback list growing too
+ big.
+
+ rcutree.rcu_normal_wake_from_gp= [KNL]
+ Reduces a latency of synchronize_rcu() call. This approach
+ maintains its own track of synchronize_rcu() callers, so it
+ does not interact with regular callbacks because it does not
+ use a call_rcu[_hurry]() path. Please note, this is for a
+ normal grace period.
+
+ How to enable it:
+
+ echo 1 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
+ or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=1"
+
+ Default is 1 if num_possible_cpus() <= 16 and it is not explicitly
+ disabled by the boot parameter passing 0.
+
rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
@@ -5165,7 +5877,42 @@
rcutorture.gp_cond= [KNL]
Use conditional/asynchronous update-side
- primitives, if available.
+ normal-grace-period primitives, if available.
+
+ rcutorture.gp_cond_exp= [KNL]
+ Use conditional/asynchronous update-side
+ expedited-grace-period primitives, if available.
+
+ rcutorture.gp_cond_full= [KNL]
+ Use conditional/asynchronous update-side
+ normal-grace-period primitives that also take
+ concurrent expedited grace periods into account,
+ if available.
+
+ rcutorture.gp_cond_exp_full= [KNL]
+ Use conditional/asynchronous update-side
+ expedited-grace-period primitives that also take
+ concurrent normal grace periods into account,
+ if available.
+
+ rcutorture.gp_cond_wi= [KNL]
+ Nominal wait interval for normal conditional
+ grace periods (specified by rcutorture's
+ gp_cond and gp_cond_full module parameters),
+ in microseconds. The actual wait interval will
+ be randomly selected to nanosecond granularity up
+ to this wait interval. Defaults to 16 jiffies,
+ for example, 16,000 microseconds on a system
+ with HZ=1000.
+
+ rcutorture.gp_cond_wi_exp= [KNL]
+ Nominal wait interval for expedited conditional
+ grace periods (specified by rcutorture's
+ gp_cond_exp and gp_cond_exp_full module
+ parameters), in microseconds. The actual wait
+ interval will be randomly selected to nanosecond
+ granularity up to this wait interval. Defaults to
+ 128 microseconds.
rcutorture.gp_exp= [KNL]
Use expedited update-side primitives, if available.
@@ -5174,6 +5921,43 @@
Use normal (non-expedited) asynchronous
update-side primitives, if available.
+ rcutorture.gp_poll= [KNL]
+ Use polled update-side normal-grace-period
+ primitives, if available.
+
+ rcutorture.gp_poll_exp= [KNL]
+ Use polled update-side expedited-grace-period
+ primitives, if available.
+
+ rcutorture.gp_poll_full= [KNL]
+ Use polled update-side normal-grace-period
+ primitives that also take concurrent expedited
+ grace periods into account, if available.
+
+ rcutorture.gp_poll_exp_full= [KNL]
+ Use polled update-side expedited-grace-period
+ primitives that also take concurrent normal
+ grace periods into account, if available.
+
+ rcutorture.gp_poll_wi= [KNL]
+ Nominal wait interval for normal conditional
+ grace periods (specified by rcutorture's
+ gp_poll and gp_poll_full module parameters),
+ in microseconds. The actual wait interval will
+ be randomly selected to nanosecond granularity up
+ to this wait interval. Defaults to 16 jiffies,
+ for example, 16,000 microseconds on a system
+ with HZ=1000.
+
+ rcutorture.gp_poll_wi_exp= [KNL]
+ Nominal wait interval for expedited conditional
+ grace periods (specified by rcutorture's
+ gp_poll_exp and gp_poll_exp_full module
+ parameters), in microseconds. The actual wait
+ interval will be randomly selected to nanosecond
+ granularity up to this wait interval. Defaults to
+ 128 microseconds.
+
rcutorture.gp_sync= [KNL]
Use normal (non-expedited) synchronous
update-side primitives, if available. If all
@@ -5182,6 +5966,31 @@
are zero, rcutorture acts as if is interpreted
they are all non-zero.
+ rcutorture.gpwrap_lag= [KNL]
+ Enable grace-period wrap lag testing. Setting
+ to false prevents the gpwrap lag test from
+ running. Default is true.
+
+ rcutorture.gpwrap_lag_gps= [KNL]
+ Set the value for grace-period wrap lag during
+ active lag testing periods. This controls how many
+ grace periods differences we tolerate between
+ rdp and rnp's gp_seq before setting overflow flag.
+ The default is always set to 8.
+
+ rcutorture.gpwrap_lag_cycle_mins= [KNL]
+ Set the total cycle duration for gpwrap lag
+ testing in minutes. This is the total time for
+ one complete cycle of active and inactive
+ testing periods. Default is 30 minutes.
+
+ rcutorture.gpwrap_lag_active_mins= [KNL]
+ Set the duration for which gpwrap lag is active
+ within each cycle, in minutes. During this time,
+ the grace-period wrap lag will be set to the
+ value specified by gpwrap_lag_gps. Default is
+ 5 minutes.
+
rcutorture.irqreader= [KNL]
Run RCU readers from irq handlers, or, more
accurately, from a timer handler. Not all RCU
@@ -5227,10 +6036,21 @@
Set time (jiffies) between CPU-hotplug operations,
or zero to disable CPU-hotplug testing.
- rcutorture.read_exit= [KNL]
- Set the number of read-then-exit kthreads used
- to test the interaction of RCU updaters and
- task-exit processing.
+ rcutorture.preempt_duration= [KNL]
+ Set duration (in milliseconds) of preemptions
+ by a high-priority FIFO real-time task. Set to
+ zero (the default) to disable. The CPUs to
+ preempt are selected randomly from the set that
+ are online at a given point in time. Races with
+ CPUs going offline are ignored, with that attempt
+ at preemption skipped.
+
+ rcutorture.preempt_interval= [KNL]
+ Set interval (in milliseconds, defaulting to one
+ second) between preemptions by a high-priority
+ FIFO real-time task. This delay is mediated
+ by an hrtimer and is further fuzzed to avoid
+ inadvertent synchronizations.
rcutorture.read_exit_burst= [KNL]
The number of times in a given read-then-exit
@@ -5241,6 +6061,14 @@
The delay, in seconds, between successive
read-then-exit testing episodes.
+ rcutorture.reader_flavor= [KNL]
+ A bit mask indicating which readers to use.
+ If there is more than one bit set, the readers
+ are entered from low-order bit up, and are
+ exited in the opposite order. For SRCU, the
+ 0x1 bit is normal readers, 0x2 NMI-safe readers,
+ and 0x4 light-weight readers.
+
rcutorture.shuffle_interval= [KNL]
Set task-shuffle interval (s). Shuffling tasks
allows some CPUs to go into dyntick-idle mode
@@ -5272,7 +6100,13 @@
Time to wait (s) after boot before inducing stall.
rcutorture.stall_cpu_irqsoff= [KNL]
- Disable interrupts while stalling if set.
+ Disable interrupts while stalling if set, but only
+ on the first stall in the set.
+
+ rcutorture.stall_cpu_repeat= [KNL]
+ Number of times to repeat the stall sequence,
+ so that rcutorture.stall_cpu_repeat=3 will result
+ in four stall sequences.
rcutorture.stall_gp_kthread= [KNL]
Duration (s) of forced sleep within RCU
@@ -5298,6 +6132,11 @@
rcutorture.test_boost_duration= [KNL]
Duration (s) of each individual boost test.
+ rcutorture.test_boost_holdoff= [KNL]
+ Holdoff time (s) from start of test to the start
+ of RCU priority-boost testing. Defaults to zero,
+ that is, no holdoff.
+
rcutorture.test_boost_interval= [KNL]
Interval (s) between each boost test.
@@ -5460,14 +6299,6 @@
of zero will disable batching. Batching is
always disabled for synchronize_rcu_tasks().
- rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
- Set timeout in milliseconds RCU Tasks
- Rude asynchronous callback batching for
- call_rcu_tasks_rude(). A negative value
- will take the default. A value of zero will
- disable batching. Batching is always disabled
- for synchronize_rcu_tasks_rude().
-
rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
Set timeout in milliseconds RCU Tasks
Trace asynchronous callback batching for
@@ -5484,7 +6315,7 @@
Run specified binary instead of /init from the ramdisk,
used for early userspace startup. See initrd.
- rdrand= [X86]
+ rdrand= [X86,EARLY]
force - Override the decision by the kernel to hide the
advertisement of RDRAND support (this affects
certain AMD processors because of buggy BIOS
@@ -5494,7 +6325,7 @@
rdt= [HW,X86,RDT]
Turn on/off individual RDT features. List is:
cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
- mba, smba, bmec.
+ mba, smba, bmec, abmc, sdciae.
E.g. to turn on cmt and turn off mba use:
rdt=cmt,!mba
@@ -5512,6 +6343,55 @@
reboot_cpu is s[mp]#### with #### being the processor
to be used for rebooting.
+ acpi
+ Use the ACPI RESET_REG in the FADT. If ACPI is not
+ configured or the ACPI reset does not work, the reboot
+ path attempts the reset using the keyboard controller.
+
+ bios
+ Use the CPU reboot vector for warm reset
+
+ cold
+ Set the cold reboot flag
+
+ default
+ There are some built-in platform specific "quirks"
+ - you may see: "reboot: <name> series board detected.
+ Selecting <type> for reboots." In the case where you
+ think the quirk is in error (e.g. you have newer BIOS,
+ or newer board) using this option will ignore the
+ built-in quirk table, and use the generic default
+ reboot actions.
+
+ efi
+ Use efi reset_system runtime service. If EFI is not
+ configured or the EFI reset does not work, the reboot
+ path attempts the reset using the keyboard controller.
+
+ force
+ Don't stop other CPUs on reboot. This can make reboot
+ more reliable in some cases.
+
+ kbd
+ Use the keyboard controller. cold reset (default)
+
+ pci
+ Use a write to the PCI config space register 0xcf9 to
+ trigger reboot.
+
+ triple
+ Force a triple fault (init)
+
+ warm
+ Don't set the cold reboot flag
+
+ Using warm reset will be much faster especially on big
+ memory systems because the BIOS will not go through
+ the memory check. Disadvantage is that not all
+ hardware will be completely reinitialized on reboot so
+ there may be boot problems on some systems.
+
+
refscale.holdoff= [KNL]
Set test-start holdoff period. The purpose of
this parameter is to delay the start of the
@@ -5580,7 +6460,29 @@
them. If <base> is less than 0x10000, the region
is assumed to be I/O ports; otherwise it is memory.
- reservetop= [X86-32]
+ reserve_mem= [RAM]
+ Format: nn[KMG]:<align>:<label>
+ Reserve physical memory and label it with a name that
+ other subsystems can use to access it. This is typically
+ used for systems that do not wipe the RAM, and this command
+ line will try to reserve the same physical memory on
+ soft reboots. Note, it is not guaranteed to be the same
+ location. For example, if anything about the system changes
+ or if booting a different kernel. It can also fail if KASLR
+ places the kernel at the location of where the RAM reservation
+ was from a previous boot, the new reservation will be at a
+ different location.
+ Any subsystem using this feature must add a way to verify
+ that the contents of the physical memory is from a previous
+ boot, as there may be cases where the memory will not be
+ located at the same location.
+
+ The format is size:align:label for example, to request
+ 12 megabytes of 4096 alignment for ramoops:
+
+ reserve_mem=12M:4096:oops ramoops.mem_name=oops
+
+ reservetop= [X86-32,EARLY]
Format: nn[KMG]
Reserves a hole at the top of the kernel virtual
address space.
@@ -5620,7 +6522,7 @@
that don't.
off - no mitigation
- auto - automatically select a migitation
+ auto - automatically select a mitigation
auto,nosmt - automatically select a mitigation,
disabling SMT if necessary for
the full mitigation (only on Zen1
@@ -5658,14 +6560,11 @@
2 The "airplane mode" button toggles between everything
blocked and everything unblocked.
- rhash_entries= [KNL,NET]
- Set number of hash buckets for route cache
-
ring3mwait=disable
[KNL] Disable ring 3 MONITOR/MWAIT feature on supported
CPUs.
- riscv_isa_fallback [RISCV]
+ riscv_isa_fallback [RISCV,EARLY]
When CONFIG_RISCV_ISA_FALLBACK is not enabled, permit
falling back to detecting extension support by parsing
"riscv,isa" property on devicetree systems when the
@@ -5674,20 +6573,22 @@
ro [KNL] Mount root device read-only on boot
- rodata= [KNL]
+ rodata= [KNL,EARLY]
on Mark read-only kernel memory as read-only (default).
off Leave read-only kernel memory writable for debugging.
- full Mark read-only kernel memory and aliases as read-only
- [arm64]
+ noalias Mark read-only kernel memory as read-only but retain
+ writable aliases in the direct map for regions outside
+ of the kernel image. [arm64]
rockchip.usb_uart
+ [EARLY]
Enable the uart passthrough on the designated usb port
on Rockchip SoCs. When active, the signals of the
debug-uart get routed to the D+ and D- pins of the usb
port and the regular usb controller gets disabled.
root= [KNL] Root filesystem
- Usually this a a block device specifier of some kind,
+ Usually this is a block device specifier of some kind,
see the early_lookup_bdev comment in
block/early-lookup.c for details.
Alternatively this can be "ram" for the legacy initial
@@ -5699,6 +6600,9 @@
rootflags= [KNL] Set root filesystem mount option string
+ initramfs_options= [KNL]
+ Specify mount options for for the initramfs mount.
+
rootfstype= [KNL] Set root filesystem type
rootwait [KNL] Wait (indefinitely) for root device to show up.
@@ -5714,6 +6618,15 @@
Memory area to be used by remote processor image,
managed by CMA.
+ rseq_debug= [KNL] Enable or disable restartable sequence
+ debug mode. Defaults to CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE.
+ Format: <bool>
+
+ rt_group_sched= [KNL] Enable or disable SCHED_RR/FIFO group scheduling
+ when CONFIG_RT_GROUP_SCHED=y. Defaults to
+ !CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED.
+ Format: <bool>
+
rw [KNL] Mount root device read-write on boot
S [KNL] Run init in single mode
@@ -5741,7 +6654,12 @@
sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
- sched_verbose [KNL] Enables verbose scheduler debug messages.
+ sched_proxy_exec= [KNL]
+ Enables or disables "proxy execution" style
+ solution to mutex-based priority inversion.
+ Format: <bool>
+
+ sched_verbose [KNL,EARLY] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
Allowed values are enable and disable. This feature
@@ -5749,6 +6667,7 @@
but is useful for debugging and performance tuning.
sched_thermal_decay_shift=
+ [Deprecated]
[KNL, SMP] Set a decay shift for scheduler thermal
pressure signal. Thermal pressure signal follows the
default decay period of other scheduler pelt
@@ -5856,7 +6775,11 @@
non-zero "wait" parameter. See weight_single
and weight_many.
- skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
+ sdw_mclk_divider=[SDW]
+ Specify the MCLK divider for Intel SoundWire buses in
+ case the BIOS does not provide the clock rate properly.
+
+ skew_tick= [KNL,EARLY] Offset the periodic timer tick per cpu to mitigate
xtime_lock contention on larger systems, and/or RCU lock
contention on all systems with CONFIG_MAXSMP set.
Format: { "0" | "1" }
@@ -5878,7 +6801,16 @@
serialnumber [BUGS=X86-32]
- sev=option[,option...] [X86-64] See Documentation/arch/x86/x86_64/boot-options.rst
+ sev=option[,option...] [X86-64]
+
+ debug
+ Enable debug messages.
+
+ nosnp
+ Do not enable SEV-SNP (applies to host/hypervisor
+ only). Setting 'nosnp' avoids the RMP check overhead
+ in memory accesses when users do not want to run
+ SEV-SNP guests.
shapers= [NET]
Maximal number of shapers.
@@ -5892,14 +6824,47 @@
apic=verbose is specified.
Example: apic=debug show_lapic=all
- simeth= [IA-64]
- simscsi=
+ slab_debug[=options[,slabs][;[options[,slabs]]...] [MM]
+ Enabling slab_debug allows one to determine the
+ culprit if slab objects become corrupted. Enabling
+ slab_debug can create guard zones around objects and
+ may poison objects when not in use. Also tracks the
+ last alloc / free. For more information see
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_debug legacy name also accepted for now)
- slram= [HW,MTD]
+ Using this option implies the "no_hash_pointers"
+ option which can be undone by adding the
+ "hash_pointers=always" option.
+
+ slab_max_order= [MM]
+ Determines the maximum allowed order for slabs.
+ A high setting may cause OOMs due to memory
+ fragmentation. For more information see
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_max_order legacy name also accepted for now)
slab_merge [MM]
Enable merging of slabs with similar size when the
kernel is built without CONFIG_SLAB_MERGE_DEFAULT.
+ (slub_merge legacy name also accepted for now)
+
+ slab_min_objects= [MM]
+ The minimum number of objects per slab. SLUB will
+ increase the slab order up to slab_max_order to
+ generate a sufficiently large slab able to contain
+ the number of objects indicated. The higher the number
+ of objects the smaller the overhead of tracking slabs
+ and the less frequently locks need to be acquired.
+ For more information see
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_min_objects legacy name also accepted for now)
+
+ slab_min_order= [MM]
+ Determines the minimum page order for slabs. Must be
+ lower or equal to slab_max_order. For more information see
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_min_order legacy name also accepted for now)
slab_nomerge [MM]
Disable merging of slabs with similar size. May be
@@ -5912,48 +6877,21 @@
cache (risks via metadata attacks are mostly
unchanged). Debug options disable merging on their
own.
- For more information see Documentation/mm/slub.rst.
+ For more information see
+ Documentation/admin-guide/mm/slab.rst.
+ (slub_nomerge legacy name also accepted for now)
+
+ slab_strict_numa [MM]
+ Support memory policies on a per object level
+ in the slab allocator. The default is for memory
+ policies to be applied at the folio level when
+ a new folio is needed or a partial folio is
+ retrieved from the lists. Increases overhead
+ in the slab fastpaths but gains more accurate
+ NUMA kernel object placement which helps with slow
+ interconnects in NUMA systems.
- slab_max_order= [MM, SLAB]
- Determines the maximum allowed order for slabs.
- A high setting may cause OOMs due to memory
- fragmentation. Defaults to 1 for systems with
- more than 32MB of RAM, 0 otherwise.
-
- slub_debug[=options[,slabs][;[options[,slabs]]...] [MM, SLUB]
- Enabling slub_debug allows one to determine the
- culprit if slab objects become corrupted. Enabling
- slub_debug can create guard zones around objects and
- may poison objects when not in use. Also tracks the
- last alloc / free. For more information see
- Documentation/mm/slub.rst.
-
- slub_max_order= [MM, SLUB]
- Determines the maximum allowed order for slabs.
- A high setting may cause OOMs due to memory
- fragmentation. For more information see
- Documentation/mm/slub.rst.
-
- slub_min_objects= [MM, SLUB]
- The minimum number of objects per slab. SLUB will
- increase the slab order up to slub_max_order to
- generate a sufficiently large slab able to contain
- the number of objects indicated. The higher the number
- of objects the smaller the overhead of tracking slabs
- and the less frequently locks need to be acquired.
- For more information see Documentation/mm/slub.rst.
-
- slub_min_order= [MM, SLUB]
- Determines the minimum page order for slabs. Must be
- lower than slub_max_order.
- For more information see Documentation/mm/slub.rst.
-
- slub_merge [MM, SLUB]
- Same with slab_merge.
-
- slub_nomerge [MM, SLUB]
- Same with slab_nomerge. This is supported for legacy.
- See slab_nomerge for more information.
+ slram= [HW,MTD]
smart2= [HW]
Format: <io1>[,<io2>[,...,<io8>]]
@@ -5987,10 +6925,10 @@
1: Fast pin select (default)
2: ATC IRMode
- smt= [KNL,MIPS,S390] Set the maximum number of threads (logical
- CPUs) to use per physical CPU on systems capable of
- symmetric multithreading (SMT). Will be capped to the
- actual hardware limit.
+ smt= [KNL,MIPS,S390,EARLY] Set the maximum number of threads
+ (logical CPUs) to use per physical CPU on systems
+ capable of symmetric multithreading (SMT). Will
+ be capped to the actual hardware limit.
Format: <integer>
Default: -1 (no limit)
@@ -6012,7 +6950,22 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst
- spectre_v2= [X86] Control mitigation of Spectre variant 2
+ spectre_bhi= [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the
+ deployment of the HW BHI control and the SW BHB
+ clearing sequence.
+
+ on - (default) Enable the HW or SW mitigation as
+ needed. This protects the kernel from
+ both syscalls and VMs.
+ vmexit - On systems which don't have the HW mitigation
+ available, enable the SW mitigation on vmexit
+ ONLY. On such systems, the host kernel is
+ protected from VM-originated BHI attacks, but
+ may still be vulnerable to syscall attacks.
+ off - Disable the mitigation.
+
+ spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
user space attacks.
@@ -6027,11 +6980,13 @@
Selecting 'on' will, and 'auto' may, choose a
mitigation method at run time according to the
CPU, the available microcode, the setting of the
- CONFIG_RETPOLINE configuration option, and the
- compiler with which the kernel was built.
+ CONFIG_MITIGATION_RETPOLINE configuration option,
+ and the compiler with which the kernel was built.
Selecting 'on' will also enable the mitigation
against user space to user space task attacks.
+ Selecting specific mitigation does not force enable
+ user mitigations.
Selecting 'off' will disable both the kernel and
the user space protections.
@@ -6092,7 +7047,7 @@
spectre_v2_user=auto.
spec_rstack_overflow=
- [X86] Control RAS overflow mitigation on AMD Zen CPUs
+ [X86,EARLY] Control RAS overflow mitigation on AMD Zen CPUs
off - Disable mitigation
microcode - Enable microcode mitigation only
@@ -6103,7 +7058,7 @@
(cloud-specific mitigation)
spec_store_bypass_disable=
- [HW] Control Speculative Store Bypass (SSB) Disable mitigation
+ [HW,EARLY] Control Speculative Store Bypass (SSB) Disable mitigation
(Speculative Store Bypass vulnerability)
Certain CPUs are vulnerable to an exploit against a
@@ -6154,11 +7109,6 @@
Not specifying this option is equivalent to
spec_store_bypass_disable=auto.
- spia_io_base= [HW,MTD]
- spia_fio_base=
- spia_pedr=
- spia_peddr=
-
split_lock_detect=
[X86] Enable split lock detection or bus lock detection
@@ -6199,7 +7149,7 @@
#DB exception for bus lock is triggered only when
CPL > 0.
- srbds= [X86,INTEL]
+ srbds= [X86,INTEL,EARLY]
Control the Special Register Buffer Data Sampling
(SRBDS) mitigation.
@@ -6286,7 +7236,7 @@
srcutree.convert_to_big must have the 0x10 bit
set for contention-based conversions to occur.
- ssbd= [ARM64,HW]
+ ssbd= [ARM64,HW,EARLY]
Speculative Store Bypass Disable control
On CPUs that are vulnerable to the Speculative
@@ -6310,14 +7260,19 @@
growing up) the main stack are reserved for no other
mapping. Default value is 256 pages.
- stack_depot_disable= [KNL]
+ stack_depot_disable= [KNL,EARLY]
Setting this to true through kernel command line will
disable the stack depot thereby saving the static memory
consumed by the stack hash table. By default this is set
to false.
+ stack_depot_max_pools= [KNL,EARLY]
+ Specify the maximum number of pools to use for storing
+ stack traces. Pools are allocated on-demand up to this
+ limit. Default value is 8191 pools.
+
stacktrace [FTRACE]
- Enabled the stack tracer on boot up.
+ Enable the stack tracer on boot up.
stacktrace_filter=[function-list]
[FTRACE] Limit the functions that the stack tracer
@@ -6349,16 +7304,19 @@
be used to filter out binaries which have
not yet been made aware of AT_MINSIGSTKSZ.
- stress_hpt [PPC]
+ stress_hpt [PPC,EARLY]
Limits the number of kernel HPT entries in the hash
page table to increase the rate of hash page table
faults on kernel addresses.
- stress_slb [PPC]
+ stress_slb [PPC,EARLY]
Limits the number of kernel SLB entries, and flushes
them frequently to increase the rate of SLB faults
on kernel addresses.
+ no_slb_preload [PPC,EARLY]
+ Disables slb preloading for userspace.
+
sunrpc.min_resvport=
sunrpc.max_resvport=
[NFS,SUNRPC]
@@ -6414,7 +7372,7 @@
This parameter controls use of the Protected
Execution Facility on pSeries.
- swiotlb= [ARM,IA-64,PPC,MIPS,X86]
+ swiotlb= [ARM,PPC,MIPS,X86,S390,EARLY]
Format: { <int> [,<int>] | force | noforce }
<int> -- Number of I/O TLB slabs
<int> -- Second integer after comma. Number of swiotlb
@@ -6424,7 +7382,7 @@
wouldn't be automatically used by the kernel
noforce -- Never use bounce buffers (for debugging)
- switches= [HW,M68k]
+ switches= [HW,M68k,EARLY]
sysctl.*= [KNL]
Set a sysctl parameter, right before loading the init
@@ -6483,11 +7441,30 @@
<deci-seconds>: poll all this frequency
0: no polling (default)
- threadirqs [KNL]
+ thp_anon= [KNL]
+ Format: <size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>
+ state is one of "always", "madvise", "never" or "inherit".
+ Control the default behavior of the system with respect
+ to anonymous transparent hugepages.
+ Can be used multiple times for multiple anon THP sizes.
+ See Documentation/admin-guide/mm/transhuge.rst for more
+ details.
+
+ threadirqs [KNL,EARLY]
Force threading of all interrupt handlers except those
marked explicitly IRQF_NO_THREAD.
- topology= [S390]
+ thp_shmem= [KNL]
+ Format: <size>[KMG],<size>[KMG]:<policy>;<size>[KMG]-<size>[KMG]:<policy>
+ Control the default policy of each hugepage size for the
+ internal shmem mount. <policy> is one of policies available
+ for the shmem mount ("always", "inherit", "never", "within_size",
+ and "advise").
+ It can be used multiple times for multiple shmem THP sizes.
+ See Documentation/admin-guide/mm/transhuge.rst for more
+ details.
+
+ topology= [S390,EARLY]
Format: {off | on}
Specify if the kernel should make use of the cpu
topology information if the hardware supports this.
@@ -6495,12 +7472,6 @@
e.g. base its process migration decisions on it.
Default is on.
- topology_updates= [KNL, PPC, NUMA]
- Format: {off}
- Specify if the kernel should ignore (off)
- topology updates sent by the hypervisor to this
- LPAR.
-
torture.disable_onoff_at_boot= [KNL]
Prevent the CPU-hotplug component of torturing
until after init has spawned.
@@ -6520,7 +7491,22 @@
torture.verbose_sleep_duration= [KNL]
Duration of each verbose-printk() sleep in jiffies.
- tp720= [HW,PS2]
+ tpm.disable_pcr_integrity= [HW,TPM]
+ Do not protect PCR registers from unintended physical
+ access, or interposers in the bus by the means of
+ having an integrity protected session wrapped around
+ TPM2_PCR_Extend command. Consider this in a situation
+ where TPM is heavily utilized by IMA, thus protection
+ causing a major performance hit, and the space where
+ machines are deployed is by other means guarded.
+
+ tpm_crb_ffa.busy_timeout_ms= [ARM64,TPM]
+ Maximum time in milliseconds to retry sending a message
+ to the TPM service before giving up. This parameter controls
+ how long the system will continue retrying when the TPM
+ service is busy.
+ Format: <unsigned int>
+ Default: 2000 (2 seconds)
tpm_suspend_pcr=[HW,TPM]
Format: integer pcr id
@@ -6548,7 +7534,7 @@
To turn off having tracepoints sent to printk,
echo 0 > /proc/sys/kernel/tracepoint_printk
Note, echoing 1 into this file without the
- tracepoint_printk kernel cmdline option has no effect.
+ tp_printk kernel cmdline option has no effect.
The tp_printk_stop_on_boot (see below) can also be used
to stop the printing of events to console at
@@ -6578,7 +7564,7 @@
(converted into nanoseconds). Fast, but
depending on the architecture, may not be
in sync between CPUs.
- global - Event time stamps are synchronize across
+ global - Event time stamps are synchronized across
CPUs. May be slower than the local clock,
but better for some race conditions.
counter - Simple counting of events (1, 2, ..)
@@ -6600,6 +7586,14 @@
comma-separated list of trace events to enable. See
also Documentation/trace/events.rst
+ To enable modules, use :mod: keyword:
+
+ trace_event=:mod:<module>
+
+ The value before :mod: will only enable specific events
+ that are part of the module. See the above mentioned
+ document for more information.
+
trace_instance=[instance-info]
[FTRACE] Create a ring buffer instance early in boot up.
This will be listed in:
@@ -6620,6 +7614,59 @@
the same thing would happen if it was left off). The irq_handler_entry
event, and all events under the "initcall" system.
+ Flags can be added to the instance to modify its behavior when it is
+ created. The flags are separated by '^'.
+
+ The available flags are:
+
+ traceoff - Have the tracing instance tracing disabled after it is created.
+ traceprintk - Have trace_printk() write into this trace instance
+ (note, "printk" and "trace_printk" can also be used)
+
+ trace_instance=foo^traceoff^traceprintk,sched,irq
+
+ The flags must come before the defined events.
+
+ If memory has been reserved (see memmap for x86), the instance
+ can use that memory:
+
+ memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
+
+ The above will create a "boot_map" instance that uses the physical
+ memory at 0x284500000 that is 12Megs. The per CPU buffers of that
+ instance will be split up accordingly.
+
+ Alternatively, the memory can be reserved by the reserve_mem option:
+
+ reserve_mem=12M:4096:trace trace_instance=boot_map@trace
+
+ This will reserve 12 megabytes at boot up with a 4096 byte alignment
+ and place the ring buffer in this memory. Note that due to KASLR, the
+ memory may not be the same location each time, which will not preserve
+ the buffer content.
+
+ Also note that the layout of the ring buffer data may change between
+ kernel versions where the validator will fail and reset the ring buffer
+ if the layout is not the same as the previous kernel.
+
+ If the ring buffer is used for persistent bootups and has events enabled,
+ it is recommend to disable tracing so that events from a previous boot do not
+ mix with events of the current boot (unless you are debugging a random crash
+ at boot up).
+
+ reserve_mem=12M:4096:trace trace_instance=boot_map^traceoff^traceprintk@trace,sched,irq
+
+ Note, saving the trace buffer across reboots does require that the system
+ is set up to not wipe memory. For instance, CONFIG_RESET_ATTACK_MITIGATION
+ can force a memory reset on boot which will clear any trace that was stored.
+ This is just one of many ways that can clear memory. Make sure your system
+ keeps the content of memory across reboots before relying on this option.
+
+ NB: Both the mapped address and size must be page aligned for the architecture.
+
+ See also Documentation/trace/debugging.rst
+
+
trace_options=[option-list]
[FTRACE] Enable or disable tracer options at boot.
The option-list is a comma delimited list of options
@@ -6637,12 +7684,12 @@
section.
trace_trigger=[trigger-list]
- [FTRACE] Add a event trigger on specific events.
+ [FTRACE] Add an event trigger on specific events.
Set a trigger on top of a specific event, with an optional
filter.
- The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..."
- Where more than one trigger may be specified that are comma deliminated.
+ The format is "trace_trigger=<event>.<trigger>[ if <filter>],..."
+ Where more than one trigger may be specified that are comma delimited.
For example:
@@ -6650,11 +7697,20 @@
The above will enable the "stacktrace" trigger on the "sched_switch"
event but only trigger it if the "prev_state" of the "sched_switch"
- event is "2" (TASK_UNINTERUPTIBLE).
+ event is "2" (TASK_UNINTERRUPTIBLE).
See also "Event triggers" in Documentation/trace/events.rst
+ traceoff_after_boot
+ [FTRACE] Sometimes tracing is used to debug issues
+ during the boot process. Since the trace buffer has a
+ limited amount of storage, it may be prudent to
+ disable tracing after the boot is finished, otherwise
+ the critical information may be overwritten. With this
+ option, the main tracing buffer will be turned off at
+ the end of the boot process.
+
traceoff_on_warning
[FTRACE] enable this option to disable tracing when a
warning is hit. This turns off "tracing_on". Tracing can
@@ -6676,6 +7732,20 @@
See Documentation/admin-guide/mm/transhuge.rst
for more details.
+ transparent_hugepage_shmem= [KNL]
+ Format: [always|within_size|advise|never|deny|force]
+ Can be used to control the hugepage allocation policy for
+ the internal shmem mount.
+ See Documentation/admin-guide/mm/transhuge.rst
+ for more details.
+
+ transparent_hugepage_tmpfs= [KNL]
+ Format: [always|within_size|advise|never]
+ Can be used to control the default hugepage allocation policy
+ for the tmpfs mount.
+ See Documentation/admin-guide/mm/transhuge.rst
+ for more details.
+
trusted.source= [KEYS]
Format: <string>
This parameter identifies the trust source as a backend
@@ -6684,6 +7754,7 @@
- "tpm"
- "tee"
- "caam"
+ - "dcp"
If not specified then it defaults to iterating through
the trust source list starting with TPM and assigns the
first trust source as a backend which is initialized
@@ -6699,6 +7770,31 @@
If not specified, "default" is used. In this case,
the RNG's choice is left to each individual trust source.
+ trusted.dcp_use_otp_key
+ This is intended to be used in combination with
+ trusted.source=dcp and will select the DCP OTP key
+ instead of the DCP UNIQUE key blob encryption.
+
+ trusted.dcp_skip_zk_test
+ This is intended to be used in combination with
+ trusted.source=dcp and will disable the check if the
+ blob key is all zeros. This is helpful for situations where
+ having this key zero'ed is acceptable. E.g. in testing
+ scenarios.
+
+ tsa= [X86] Control mitigation for Transient Scheduler
+ Attacks on AMD CPUs. Search the following in your
+ favourite search engine for more details:
+
+ "Technical guidance for mitigating transient scheduler
+ attacks".
+
+ off - disable the mitigation
+ on - enable the mitigation (default)
+ user - mitigate only user/kernel transitions
+ vm - mitigate only guest/host transitions
+
+
tsc= Disable clocksource stability checks for TSC.
Format: <string>
[x86] reliable: mark tsc clocksource as reliable, this
@@ -6728,7 +7824,7 @@
can be overridden by a later tsc=nowatchdog. A console
message will flag any such suppression or overriding.
- tsc_early_khz= [X86] Skip early TSC calibration and use the given
+ tsc_early_khz= [X86,EARLY] Skip early TSC calibration and use the given
value instead. Useful when the early TSC frequency discovery
procedure is not reliable, such as on overclocked systems
with CPUID.16h support and partial CPUID.15h support.
@@ -6763,7 +7859,7 @@
See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
for more details.
- tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
+ tsx_async_abort= [X86,INTEL,EARLY] Control mitigation for the TSX Async
Abort (TAA) vulnerability.
Similar to Micro-architectural Data Sampling (MDS)
@@ -6826,10 +7922,26 @@
Note that genuine overcurrent events won't be
reported either.
+ unaligned_scalar_speed=
+ [RISCV]
+ Format: {slow | fast | unsupported}
+ Allow skipping scalar unaligned access speed tests. This
+ is useful for testing alternative code paths and to skip
+ the tests in environments where they run too slowly. All
+ CPUs must have the same scalar unaligned access speed.
+
+ unaligned_vector_speed=
+ [RISCV]
+ Format: {slow | fast | unsupported}
+ Allow skipping vector unaligned access speed tests. This
+ is useful for testing alternative code paths and to skip
+ the tests in environments where they run too slowly. All
+ CPUs must have the same vector unaligned access speed.
+
unknown_nmi_panic
[X86] Cause panic on unknown NMI.
- unwind_debug [X86-64]
+ unwind_debug [X86-64,EARLY]
Enable unwinder debug output. This can be
useful for debugging certain unwinder error
conditions, including corrupt stacks and
@@ -6952,6 +8064,9 @@
usb-storage.delay_use=
[UMS] The delay in seconds before a new device is
scanned for Logical Units (default 1).
+ Optionally the delay in milliseconds if the value has
+ suffix with "ms".
+ Example: delay_use=2567ms
usb-storage.quirks=
[UMS] A list of quirks entries to supplement or
@@ -7018,13 +8133,6 @@
16 - SIGBUS faults
Example: user_debug=31
- userpte=
- [X86] Flags controlling user PTE allocations.
-
- nohigh = do not allocate PTE pages in
- HIGHMEM regardless of setting
- of CONFIG_HIGHPTE.
-
vdso= [X86,SH,SPARC]
On X86_32, this is an alias for vdso32=. Otherwise:
@@ -7045,10 +8153,7 @@
Try vdso32=0 if you encounter an error that says:
dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
- vector= [IA-64,SMP]
- vector=percpu: enable percpu vector domain
-
- video= [FB] Frame buffer configuration
+ video= [FB,EARLY] Frame buffer configuration
See Documentation/fb/modedb.rst.
video.brightness_switch_enabled= [ACPI]
@@ -7096,13 +8201,16 @@
P Enable page structure init time poisoning
- Disable all of the above options
- vmalloc=nn[KMG] [KNL,BOOT] Forces the vmalloc area to have an exact
- size of <nn>. This can be used to increase the
- minimum size (128MB on x86). It can also be used to
- decrease the size and leave more room for directly
- mapped kernel RAM.
+ vmalloc=nn[KMG] [KNL,BOOT,EARLY] Forces the vmalloc area to have an
+ exact size of <nn>. This can be used to increase
+ the minimum size (128MB on x86, arm32 platforms).
+ It can also be used to decrease the size and leave more room
+ for directly mapped kernel RAM. Note that this parameter does
+ not exist on many other platforms (including arm64, alpha,
+ loongarch, arc, csky, hexagon, microblaze, mips, nios2, openrisc,
+ parisc, m64k, powerpc, riscv, sh, um, xtensa, s390, sparc).
- vmcp_cma=nn[MG] [KNL,S390]
+ vmcp_cma=nn[MG] [KNL,S390,EARLY]
Sets the memory size reserved for contiguous memory
allocations for the vmcp device driver.
@@ -7115,7 +8223,17 @@
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
Format: <command>
- vsyscall= [X86-64]
+ vmscape= [X86] Controls mitigation for VMscape attacks.
+ VMscape attacks can leak information from a userspace
+ hypervisor to a guest via speculative side-channels.
+
+ off - disable the mitigation
+ ibpb - use Indirect Branch Prediction Barrier
+ (IBPB) mitigation (default)
+ force - force vulnerability detection even on
+ unaffected processors
+
+ vsyscall= [X86-64,EARLY]
Controls the behavior of vsyscalls (i.e. calls to
fixed addresses of 0xffffffffff600x00 from legacy
code). Most statically-linked binaries and older
@@ -7142,7 +8260,7 @@
vt.cur_default= [VT] Default cursor shape.
Format: 0xCCBBAA, where AA, BB, and CC are the same as
the parameters of the <Esc>[?A;B;Cc escape sequence;
- see VGA-softcursor.txt. Default: 2 = underline.
+ see vga-softcursor.rst. Default: 2 = underline.
vt.default_blu= [VT]
Format: <blue0>,<blue1>,<blue2>,...,<blue15>
@@ -7213,6 +8331,13 @@
it can be updated at runtime by writing to the
corresponding sysfs file.
+ workqueue.panic_on_stall=<uint>
+ Panic when workqueue stall is detected by
+ CONFIG_WQ_WATCHDOG. It sets the number times of the
+ stall to trigger panic.
+
+ The default is 0, which disables the panic on stall.
+
workqueue.cpu_intensive_thresh_us=
Per-cpu work items which run for longer than this
threshold are automatically considered CPU intensive
@@ -7225,6 +8350,15 @@
threshold repeatedly. They are likely good
candidates for using WQ_UNBOUND workqueues instead.
+ workqueue.cpu_intensive_warning_thresh=<uint>
+ If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
+ will report the work functions which violate the
+ intensive_threshold_us repeatedly. In order to prevent
+ spurious warnings, start printing only after a work
+ function has violated this threshold number of times.
+
+ The default is 4 times. 0 disables the warning.
+
workqueue.power_efficient
Per-cpu workqueues are generally preferred because
they show better performance thanks to cache
@@ -7250,7 +8384,7 @@
This can be changed after boot by writing to the
matching /sys/module/workqueue/parameters file. All
workqueues with the "default" affinity scope will be
- updated accordignly.
+ updated accordingly.
workqueue.debug_force_rr_cpu
Workqueue used to implicitly guarantee that work
@@ -7263,13 +8397,13 @@
When enabled, memory and cache locality will be
impacted.
- writecombine= [LOONGARCH] Control the MAT (Memory Access Type) of
- ioremap_wc().
+ writecombine= [LOONGARCH,EARLY] Control the MAT (Memory Access
+ Type) of ioremap_wc().
on - Enable writecombine, use WUC for ioremap_wc()
off - Disable writecombine, use SUC for ioremap_wc()
- x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
+ x2apic_phys [X86-64,APIC,EARLY] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
@@ -7280,7 +8414,7 @@
save/restore/migration must be enabled to handle larger
domains.
- xen_emul_unplug= [HW,X86,XEN]
+ xen_emul_unplug= [HW,X86,XEN,EARLY]
Unplug Xen emulated devices
Format: [unplug0,][unplug1]
ide-disks -- unplug primary master IDE devices
@@ -7292,21 +8426,22 @@
the unplug protocol
never -- do not unplug even if version check succeeds
- xen_legacy_crash [X86,XEN]
+ xen_legacy_crash [X86,XEN,EARLY]
Crash from Xen panic notifier, without executing late
panic() code such as dumping handler.
- xen_msr_safe= [X86,XEN]
+ xen_mc_debug [X86,XEN,EARLY]
+ Enable multicall debugging when running as a Xen PV guest.
+ Enabling this feature will reduce performance a little
+ bit, so it should only be enabled for obtaining extended
+ debug data in case of multicall errors.
+
+ xen_msr_safe= [X86,XEN,EARLY]
Format: <bool>
Select whether to always use non-faulting (safe) MSR
access functions when running as Xen PV guest. The
default value is controlled by CONFIG_XEN_PV_MSR_SAFE.
- xen_nopvspin [X86,XEN]
- Disables the qspinlock slowpath using Xen PV optimizations.
- This parameter is obsoleted by "nopvspin" parameter, which
- has equivalent effect for XEN platform.
-
xen_nopv [X86]
Disables the PV optimizations forcing the HVM guest to
run as generic HVM guest with no PV drivers.
@@ -7314,7 +8449,7 @@
has equivalent effect for XEN platform.
xen_no_vector_callback
- [KNL,X86,XEN] Disable the vector callback for Xen
+ [KNL,X86,XEN,EARLY] Disable the vector callback for Xen
event channel interrupts.
xen_scrub_pages= [XEN]
@@ -7323,7 +8458,7 @@
with /sys/devices/system/xen_memory/xen_memory0/scrub_pages.
Default value controlled with CONFIG_XEN_SCRUB_PAGES_DEFAULT.
- xen_timer_slop= [X86-64,XEN]
+ xen_timer_slop= [X86-64,XEN,EARLY]
Set the timer slop (in nanoseconds) for the virtual Xen
timers (default is 100000). This adjusts the minimum
delta of virtualized Xen timers, where lower values
@@ -7376,7 +8511,7 @@
host controller quirks. Meaning of each bit can be
consulted in header drivers/usb/host/xhci.h.
- xmon [PPC]
+ xmon [PPC,EARLY]
Format: { early | on | rw | ro | off }
Controls if xmon debugger is enabled. Default is off.
Passing only "xmon" is equivalent to "xmon=early".
@@ -7394,4 +8529,3 @@
memory, and other data can't be written using
xmon commands.
off xmon is disabled.
-
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
index b6aeae3327ce..ee9a6c94f383 100644
--- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -278,12 +278,7 @@ To reduce its OS jitter, do any of the following:
due to the rtas_event_scan() function.
WARNING: Please check your CPU specifications to
make sure that this is safe on your particular system.
- e. If running on Cell Processor, build your kernel with
- CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from
- spu_gov_work().
- WARNING: Please check your CPU specifications to
- make sure that this is safe on your particular system.
- f. If running on PowerMAC, build your kernel with
+ e. If running on PowerMAC, build your kernel with
CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
avoiding OS jitter from rackmeter_do_timer().
@@ -315,7 +310,7 @@ To reduce its OS jitter, do at least one of the following:
to do.
Name:
- rcuop/%d and rcuos/%d
+ rcuop/%d, rcuos/%d, and rcuog/%d
Purpose:
Offload RCU callbacks from the corresponding CPU.
diff --git a/Documentation/admin-guide/laptops/alienware-wmi.rst b/Documentation/admin-guide/laptops/alienware-wmi.rst
new file mode 100644
index 000000000000..27a32a8057da
--- /dev/null
+++ b/Documentation/admin-guide/laptops/alienware-wmi.rst
@@ -0,0 +1,127 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+====================
+Alienware WMI Driver
+====================
+
+Kurt Borja <kuurtb@gmail.com>
+
+This is a driver for the "WMAX" WMI device, which is found in most Dell gaming
+laptops and controls various special features.
+
+Before the launch of M-Series laptops (~2018), the "WMAX" device controlled
+basic RGB lighting, deep sleep mode, HDMI mode and amplifier status.
+
+Later, this device was completely repurpused. Now it mostly deals with thermal
+profiles, sensor monitoring and overclocking. This interface is named "AWCC" and
+is known to be used by the AWCC OEM application to control these features.
+
+The alienware-wmi driver controls both interfaces.
+
+AWCC Interface
+==============
+
+WMI device documentation: Documentation/wmi/devices/alienware-wmi.rst
+
+Supported devices
+-----------------
+
+- Alienware M-Series laptops
+- Alienware X-Series laptops
+- Alienware Aurora Desktops
+- Dell G-Series laptops
+
+If you believe your device supports the AWCC interface and you don't have any of
+the features described in this document, try the following alienware-wmi module
+parameters:
+
+- ``force_platform_profile=1``: Forces probing for platform profile support
+- ``force_hwmon=1``: Forces probing for HWMON support
+
+If the module loads successfully with these parameters, consider submitting a
+patch adding your model to the ``awcc_dmi_table`` located in
+``drivers/platform/x86/dell/alienware-wmi-wmax.c`` or contacting the maintainer
+for further guidance.
+
+Status
+------
+
+The following features are currently supported:
+
+- :ref:`Platform Profile <platform-profile>`:
+
+ - Thermal profile control
+
+ - G-Mode toggling
+
+- :ref:`HWMON <hwmon>`:
+
+ - Sensor monitoring
+
+ - Manual fan control
+
+.. _platform-profile:
+
+Platform Profile
+----------------
+
+The AWCC interface exposes various firmware defined thermal profiles. These are
+exposed to user-space through the Platform Profile class interface. Refer to
+:ref:`sysfs-class-platform-profile <abi_file_testing_sysfs_class_platform_profile>`
+for more information.
+
+The name of the platform-profile class device exported by this driver is
+"alienware-wmi" and it's path can be found with:
+
+::
+
+ grep -l "alienware-wmi" /sys/class/platform-profile/platform-profile-*/name | sed 's|/[^/]*$||'
+
+If the device supports G-Mode, it is also toggled when selecting the
+``performance`` profile.
+
+.. note::
+ You may set the ``force_gmode`` module parameter to always try to toggle this
+ feature, without checking if your model supports it.
+
+.. _hwmon:
+
+HWMON
+-----
+
+The AWCC interface also supports sensor monitoring and manual fan control. Both
+of these features are exposed to user-space through the HWMON interface.
+
+The name of the hwmon class device exported by this driver is "alienware_wmi"
+and it's path can be found with:
+
+::
+
+ grep -l "alienware_wmi" /sys/class/hwmon/hwmon*/name | sed 's|/[^/]*$||'
+
+Sensor monitoring is done through the standard HWMON interface. Refer to
+:ref:`sysfs-class-hwmon <abi_file_testing_sysfs_class_hwmon>` for more
+information.
+
+Manual fan control on the other hand, is not exposed directly by the AWCC
+interface. Instead it let's us control a fan `boost` value. This `boost` value
+has the following aproximate behavior over the fan pwm:
+
+::
+
+ pwm = pwm_base + (fan_boost / 255) * (pwm_max - pwm_base)
+
+Due to the above behavior, the fan `boost` control is exposed to user-space
+through the following, custom hwmon sysfs attribute:
+
+=============================== ======= =======================================
+Name Perm Description
+=============================== ======= =======================================
+fan[1-4]_boost RW Fan boost value.
+
+ Integer value between 0 and 255
+=============================== ======= =======================================
+
+.. note::
+ In some devices, manual fan control only works reliably if the ``custom``
+ platform profile is selected.
diff --git a/Documentation/admin-guide/laptops/index.rst b/Documentation/admin-guide/laptops/index.rst
index cd9a1c2695fd..6432c251dc95 100644
--- a/Documentation/admin-guide/laptops/index.rst
+++ b/Documentation/admin-guide/laptops/index.rst
@@ -7,11 +7,14 @@ Laptop Drivers
.. toctree::
:maxdepth: 1
+ alienware-wmi
asus-laptop
disk-shock-protection
laptop-mode
lg-laptop
+ samsung-galaxybook
sony-laptop
sonypi
thinkpad-acpi
toshiba_haps
+ uniwill-laptop
diff --git a/Documentation/admin-guide/laptops/laptop-mode.rst b/Documentation/admin-guide/laptops/laptop-mode.rst
index b61cc601d298..66eb9cd918b5 100644
--- a/Documentation/admin-guide/laptops/laptop-mode.rst
+++ b/Documentation/admin-guide/laptops/laptop-mode.rst
@@ -61,7 +61,7 @@ Caveats
Check your drive's rating, and don't wear down your drive's lifetime if you
don't need to.
-* If you mount some of your ext3/reiserfs filesystems with the -n option, then
+* If you mount some of your ext3 filesystems with the -n option, then
the control script will not be able to remount them correctly. You must set
DO_REMOUNTS=0 in the control script, otherwise it will remount them with the
wrong options -- or it will fail because it cannot write to /etc/mtab.
@@ -96,7 +96,7 @@ control script increases dirty_expire_centisecs and dirty_writeback_centisecs in
dirtied are not forced to be written to disk as often. The control script also
changes the dirty background ratio, so that background writeback of dirty pages
is not done anymore. Combined with a higher commit value (also 10 minutes) for
-ext3 or ReiserFS filesystems (also done automatically by the control script),
+ext3 filesystem (also done automatically by the control script),
this results in concentration of disk activity in a small time interval which
occurs only once every 10 minutes, or whenever the disk is forced to spin up by
a cache miss. The disk can then be spun down in the periods of inactivity.
@@ -587,7 +587,7 @@ Control script::
FST=$(deduce_fstype $MP)
fi
case "$FST" in
- "ext3"|"reiserfs")
+ "ext3")
PARSEDOPTS="$(parse_mount_opts commit "$OPTS")"
mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE$NOATIME_OPT
;;
@@ -647,7 +647,7 @@ Control script::
FST=$(deduce_fstype $MP)
fi
case "$FST" in
- "ext3"|"reiserfs")
+ "ext3")
PARSEDOPTS="$(parse_mount_opts_wfstab $DEV commit $OPTS)"
PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $PARSEDOPTS)"
mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
diff --git a/Documentation/admin-guide/laptops/lg-laptop.rst b/Documentation/admin-guide/laptops/lg-laptop.rst
index 67fd6932cef4..c4dd534f91ed 100644
--- a/Documentation/admin-guide/laptops/lg-laptop.rst
+++ b/Documentation/admin-guide/laptops/lg-laptop.rst
@@ -48,8 +48,8 @@ This value is reset to 100 when the kernel boots.
Fan mode
--------
-Writing 1/0 to /sys/devices/platform/lg-laptop/fan_mode disables/enables
-the fan silent mode.
+Writing 0/1/2 to /sys/devices/platform/lg-laptop/fan_mode sets fan mode to
+Optimal/Silent/Performance respectively.
USB charge
diff --git a/Documentation/admin-guide/laptops/samsung-galaxybook.rst b/Documentation/admin-guide/laptops/samsung-galaxybook.rst
new file mode 100644
index 000000000000..752b8f1a4a74
--- /dev/null
+++ b/Documentation/admin-guide/laptops/samsung-galaxybook.rst
@@ -0,0 +1,174 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+==========================
+Samsung Galaxy Book Driver
+==========================
+
+Joshua Grisham <josh@joshuagrisham.com>
+
+This is a Linux x86 platform driver for Samsung Galaxy Book series notebook
+devices which utilizes Samsung's ``SCAI`` ACPI device in order to control
+extra features and receive various notifications.
+
+Supported devices
+=================
+
+Any device with one of the supported ACPI device IDs should be supported. This
+covers most of the "Samsung Galaxy Book" series notebooks that are currently
+available as of this writing, and could include other Samsung notebook devices
+as well.
+
+Status
+======
+
+The following features are currently supported:
+
+- :ref:`Keyboard backlight <keyboard-backlight>` control
+- :ref:`Performance mode <performance-mode>` control implemented using the
+ platform profile interface
+- :ref:`Battery charge control end threshold
+ <battery-charge-control-end-threshold>` (stop charging battery at given
+ percentage value) implemented as a battery hook
+- :ref:`Firmware Attributes <firmware-attributes>` to allow control of various
+ device settings
+- :ref:`Handling of Fn hotkeys <keyboard-hotkey-actions>` for various actions
+- :ref:`Handling of ACPI notifications and hotkeys
+ <acpi-notifications-and-hotkey-actions>`
+
+Because different models of these devices can vary in their features, there is
+logic built within the driver which attempts to test each implemented feature
+for a valid response before enabling its support (registering additional devices
+or extensions, adding sysfs attributes, etc). Therefore, it can be important to
+note that not all features may be supported for your particular device.
+
+The following features might be possible to implement but will require
+additional investigation and are therefore not supported at this time:
+
+- "Dolby Atmos" mode for the speakers
+- "Outdoor Mode" for increasing screen brightness on models with ``SAM0427``
+- "Silent Mode" on models with ``SAM0427``
+
+.. _keyboard-backlight:
+
+Keyboard backlight
+==================
+
+A new LED class named ``samsung-galaxybook::kbd_backlight`` is created which
+will then expose the device using the standard sysfs-based LED interface at
+``/sys/class/leds/samsung-galaxybook::kbd_backlight``. Brightness can be
+controlled by writing the desired value to the ``brightness`` sysfs attribute or
+with any other desired userspace utility.
+
+.. note::
+ Most of these devices have an ambient light sensor which also turns
+ off the keyboard backlight under well-lit conditions. This behavior does not
+ seem possible to control at this time, but can be good to be aware of.
+
+.. _performance-mode:
+
+Performance mode
+================
+
+This driver implements the
+Documentation/userspace-api/sysfs-platform_profile.rst interface for working
+with the "performance mode" function of the Samsung ACPI device.
+
+Mapping of each Samsung "performance mode" to its respective platform profile is
+performed dynamically by the driver, as not all models support all of the same
+performance modes. Your device might have one or more of the following mappings:
+
+- "Silent" maps to ``low-power``
+- "Quiet" maps to ``quiet``
+- "Optimized" maps to ``balanced``
+- "High performance" maps to ``performance``
+
+The result of the mapping can be printed in the kernel log when the module is
+loaded. Supported profiles can also be retrieved from
+``/sys/firmware/acpi/platform_profile_choices``, while
+``/sys/firmware/acpi/platform_profile`` can be used to read or write the
+currently selected profile.
+
+The ``balanced`` platform profile will be set during module load if no profile
+has been previously set.
+
+.. _battery-charge-control-end-threshold:
+
+Battery charge control end threshold
+====================================
+
+This platform driver will add the ability to set the battery's charge control
+end threshold, but does not have the ability to set a start threshold.
+
+This feature is typically called "Battery Saver" by the various Samsung
+applications in Windows, but in Linux we have implemented the standardized
+"charge control threshold" sysfs interface on the battery device to allow for
+controlling this functionality from the userspace.
+
+The sysfs attribute
+``/sys/class/power_supply/BAT1/charge_control_end_threshold`` can be used to
+read or set the desired charge end threshold.
+
+If you wish to maintain interoperability with the Samsung Settings application
+in Windows, then you should set the value to 100 to represent "off", or enable
+the feature using only one of the following values: 50, 60, 70, 80, or 90.
+Otherwise, the driver will accept any value between 1 and 100 as the percentage
+that you wish the battery to stop charging at.
+
+.. note::
+ Some devices have been observed as automatically "turning off" the charge
+ control end threshold if an input value of less than 30 is given.
+
+.. _firmware-attributes:
+
+Firmware Attributes
+===================
+
+The following enumeration-typed firmware attributes are set up by this driver
+and should be accessible under
+``/sys/class/firmware-attributes/samsung-galaxybook/attributes/`` if your device
+supports them:
+
+- ``power_on_lid_open`` (device should power on when the lid is opened)
+- ``usb_charging`` (USB ports can deliver power to connected devices even when
+ the device is powered off or in a low sleep state)
+- ``block_recording`` (blocks access to camera and microphone)
+
+All of these attributes are simple boolean-like enumeration values which use 0
+to represent "off" and 1 to represent "on". Use the ``current_value`` attribute
+to get or change the setting on the device.
+
+Note that when ``block_recording`` is updated, the input device "Samsung Galaxy
+Book Lens Cover" will receive a ``SW_CAMERA_LENS_COVER`` switch event which
+reflects the current state.
+
+.. _keyboard-hotkey-actions:
+
+Keyboard hotkey actions (i8042 filter)
+======================================
+
+The i8042 filter will swallow the keyboard events for the Fn+F9 hotkey (Multi-
+level keyboard backlight toggle) and Fn+F10 hotkey (Block recording toggle)
+and instead execute their actions within the driver itself.
+
+Fn+F9 will cycle through the brightness levels of the keyboard backlight. A
+notification will be sent using ``led_classdev_notify_brightness_hw_changed``
+so that the userspace can be aware of the change. This mimics the behavior of
+other existing devices where the brightness level is cycled internally by the
+embedded controller and then reported via a notification.
+
+Fn+F10 will toggle the value of the "block recording" setting, which blocks
+or allows usage of the built-in camera and microphone (and generates the same
+Lens Cover switch event mentioned above).
+
+.. _acpi-notifications-and-hotkey-actions:
+
+ACPI notifications and hotkey actions
+=====================================
+
+ACPI notifications will generate ACPI netlink events under the device class
+``samsung-galaxybook`` and bus ID matching the Samsung ACPI device ID found on
+your device. The events can be received using userspace tools such as
+``acpi_listen`` and ``acpid``.
+
+The Fn+F11 Performance mode hotkey will be handled by the driver; each keypress
+will cycle to the next available platform profile.
diff --git a/Documentation/admin-guide/laptops/sonypi.rst b/Documentation/admin-guide/laptops/sonypi.rst
index 190da1234314..7541f56e0007 100644
--- a/Documentation/admin-guide/laptops/sonypi.rst
+++ b/Documentation/admin-guide/laptops/sonypi.rst
@@ -25,7 +25,7 @@ generate, like:
(when available)
Those events (see linux/sonypi.h) can be polled using the character device node
-/dev/sonypi (major 10, minor auto allocated or specified as a option).
+/dev/sonypi (major 10, minor auto allocated or specified as an option).
A simple daemon which translates the jogdial movements into mouse wheel events
can be downloaded at: <http://popies.net/sonypi/>
diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
index 98d304010170..4ab0fef7d440 100644
--- a/Documentation/admin-guide/laptops/thinkpad-acpi.rst
+++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
@@ -444,7 +444,11 @@ event code Key Notes
0x1008 0x07 FN+F8 IBM: toggle screen expand
Lenovo: configure UltraNav,
- or toggle screen expand
+ or toggle screen expand.
+ On 2024 platforms replaced by
+ 0x131f (see below) and on newer
+ platforms (2025 +) keycode is
+ replaced by 0x1401 (see below).
0x1009 0x08 FN+F9 -
@@ -504,6 +508,11 @@ event code Key Notes
0x1019 0x18 unknown
+0x131f ... FN+F8 Platform Mode change (2024 systems).
+ Implemented in driver.
+
+0x1401 ... FN+F8 Platform Mode change (2025 + systems).
+ Implemented in driver.
... ... ...
0x1020 0x1F unknown
diff --git a/Documentation/admin-guide/laptops/uniwill-laptop.rst b/Documentation/admin-guide/laptops/uniwill-laptop.rst
new file mode 100644
index 000000000000..a16baf15516b
--- /dev/null
+++ b/Documentation/admin-guide/laptops/uniwill-laptop.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Uniwill laptop extra features
+=============================
+
+On laptops manufactured by Uniwill (either directly or as ODM), the ``uniwill-laptop`` driver
+handles various platform-specific features.
+
+Module Loading
+--------------
+
+The ``uniwill-laptop`` driver relies on a DMI table to automatically load on supported devices.
+When using the ``force`` module parameter, this DMI check will be omitted, allowing the driver
+to be loaded on unsupported devices for testing purposes.
+
+Hotkeys
+-------
+
+Usually the FN keys work without a special driver. However as soon as the ``uniwill-laptop`` driver
+is loaded, the FN keys need to be handled manually. This is done automatically by the driver itself.
+
+Keyboard settings
+-----------------
+
+The ``uniwill-laptop`` driver allows the user to enable/disable:
+
+ - the FN and super key lock functionality of the integrated keyboard
+ - the touchpad toggle functionality of the integrated touchpad
+
+See Documentation/ABI/testing/sysfs-driver-uniwill-laptop for details.
+
+Hwmon interface
+---------------
+
+The ``uniwill-laptop`` driver supports reading of the CPU and GPU temperature and supports up to
+two fans. Userspace applications can access sensor readings over the hwmon sysfs interface.
+
+Platform profile
+----------------
+
+Support for changing the platform performance mode is currently not implemented.
+
+Battery Charging Control
+------------------------
+
+The ``uniwill-laptop`` driver supports controlling the battery charge limit. This happens over
+the standard ``charge_control_end_threshold`` power supply sysfs attribute. All values
+between 1 and 100 percent are supported.
+
+Additionally the driver signals the presence of battery charging issues through the standard
+``health`` power supply sysfs attribute.
+
+Lightbar
+--------
+
+The ``uniwill-laptop`` driver exposes the lightbar found on some models as a standard multicolor
+LED class device. The default name of this LED class device is ``uniwill:multicolor:status``.
+
+See Documentation/ABI/testing/sysfs-driver-uniwill-laptop for details on how to control the various
+animation modes of the lightbar.
diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst
index 4ff2cc291d18..dc7eab191caa 100644
--- a/Documentation/admin-guide/md.rst
+++ b/Documentation/admin-guide/md.rst
@@ -238,6 +238,16 @@ All md devices contain:
the number of devices in a raid4/5/6, or to support external
metadata formats which mandate such clipping.
+ logical_block_size
+ Configure the array's logical block size in bytes. This attribute
+ is only supported for 1.x meta. Write the value before starting
+ array. The final array LBS uses the maximum between this
+ configuration and LBS of all combined devices. Note that
+ LBS cannot exceed PAGE_SIZE before RAID supports folio.
+ WARNING: Arrays created on new kernel cannot be assembled at old
+ kernel due to padding check, Set module parameter 'check_new_feature'
+ to false to bypass, but data loss may occur.
+
reshape_position
This is either ``none`` or a sector number within the devices of
the array where ``reshape`` is up to. If this is set, the three
@@ -347,6 +357,54 @@ All md devices contain:
active-idle
like active, but no writes have been seen for a while (safe_mode_delay).
+ consistency_policy
+ This indicates how the array maintains consistency in case of unexpected
+ shutdown. It can be:
+
+ none
+ Array has no redundancy information, e.g. raid0, linear.
+
+ resync
+ Full resync is performed and all redundancy is regenerated when the
+ array is started after unclean shutdown.
+
+ bitmap
+ Resync assisted by a write-intent bitmap.
+
+ journal
+ For raid4/5/6, journal device is used to log transactions and replay
+ after unclean shutdown.
+
+ ppl
+ For raid5 only, Partial Parity Log is used to close the write hole and
+ eliminate resync.
+
+ The accepted values when writing to this file are ``ppl`` and ``resync``,
+ used to enable and disable PPL.
+
+ uuid
+ This indicates the UUID of the array in the following format:
+ xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+
+ bitmap_type
+ [RW] When read, this file will display the current and available
+ bitmap for this array. The currently active bitmap will be enclosed
+ in [] brackets. Writing an bitmap name or ID to this file will switch
+ control of this array to that new bitmap. Note that writing a new
+ bitmap for created array is forbidden.
+
+ none
+ No bitmap
+ bitmap
+ The default internal bitmap
+ llbitmap
+ The lockless internal bitmap
+
+If bitmap_type is not none, then additional bitmap attributes bitmap/xxx or
+llbitmap/xxx will be created after md device KOBJ_CHANGE event.
+
+If bitmap_type is bitmap, then the md device will also contain:
+
bitmap/location
This indicates where the write-intent bitmap for the array is
stored.
@@ -401,35 +459,23 @@ All md devices contain:
once the array becomes non-degraded, and this fact has been
recorded in the metadata.
- consistency_policy
- This indicates how the array maintains consistency in case of unexpected
- shutdown. It can be:
-
- none
- Array has no redundancy information, e.g. raid0, linear.
-
- resync
- Full resync is performed and all redundancy is regenerated when the
- array is started after unclean shutdown.
-
- bitmap
- Resync assisted by a write-intent bitmap.
+If bitmap_type is llbitmap, then the md device will also contain:
- journal
- For raid4/5/6, journal device is used to log transactions and replay
- after unclean shutdown.
+ llbitmap/bits
+ This is read-only, show status of bitmap bits, the number of each
+ value.
- ppl
- For raid5 only, Partial Parity Log is used to close the write hole and
- eliminate resync.
-
- The accepted values when writing to this file are ``ppl`` and ``resync``,
- used to enable and disable PPL.
+ llbitmap/metadata
+ This is read-only, show bitmap metadata, include chunksize, chunkshift,
+ chunks, offset and daemon_sleep.
- uuid
- This indicates the UUID of the array in the following format:
- xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ llbitmap/daemon_sleep
+ This is read-write, time in seconds that daemon function will be
+ triggered to clear dirty bits.
+ llbitmap/barrier_idle
+ This is read-write, time in seconds that page barrier will be idled,
+ means dirty bits in the page will be cleared.
As component devices are added to an md array, they appear in the ``md``
directory as new directories named::
@@ -758,7 +804,7 @@ These currently include:
journal_mode (currently raid5 only)
The cache mode for raid5. raid5 could include an extra disk for
- caching. The mode can be "write-throuth" and "write-back". The
+ caching. The mode can be "write-through" or "write-back". The
default is "write-through".
ppl_write_hint
diff --git a/Documentation/admin-guide/media/building.rst b/Documentation/admin-guide/media/building.rst
index a06473429916..7a413ba07f93 100644
--- a/Documentation/admin-guide/media/building.rst
+++ b/Documentation/admin-guide/media/building.rst
@@ -15,7 +15,7 @@ Please notice, however, that, if:
you should use the main media development tree ``master`` branch:
- https://git.linuxtv.org/media_tree.git/
+ https://git.linuxtv.org/media.git/
In this case, you may find some useful information at the
`LinuxTv wiki pages <https://linuxtv.org/wiki>`_:
diff --git a/Documentation/admin-guide/media/c3-isp.dot b/Documentation/admin-guide/media/c3-isp.dot
new file mode 100644
index 000000000000..42dc931ee84a
--- /dev/null
+++ b/Documentation/admin-guide/media/c3-isp.dot
@@ -0,0 +1,26 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0 | <port1> 1} | c3-isp-core\n/dev/v4l-subdev0 | {<port2> 2 | <port3> 3 | <port4> 4 | <port5> 5}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port3 -> n00000008:port0
+ n00000001:port4 -> n0000000b:port0
+ n00000001:port5 -> n0000000e:port0
+ n00000001:port2 -> n00000027
+ n00000008 [label="{{<port0> 0} | c3-isp-resizer0\n/dev/v4l-subdev1 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000008:port1 -> n00000016 [style=bold]
+ n0000000b [label="{{<port0> 0} | c3-isp-resizer1\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000b:port1 -> n0000001a [style=bold]
+ n0000000e [label="{{<port0> 0} | c3-isp-resizer2\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000e:port1 -> n00000023 [style=bold]
+ n00000011 [label="{{<port0> 0} | c3-mipi-adapter\n/dev/v4l-subdev4 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000011:port1 -> n00000001:port0 [style=bold]
+ n00000016 [label="c3-isp-cap0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n0000001a [label="c3-isp-cap1\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n0000001e [label="{{<port0> 0} | c3-mipi-csi2\n/dev/v4l-subdev5 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000001e:port1 -> n00000011:port0 [style=bold]
+ n00000023 [label="c3-isp-cap2\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n00000027 [label="c3-isp-stats\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n0000002b [label="c3-isp-params\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
+ n0000002b -> n00000001:port1
+ n0000003f [label="{{} | imx290 2-001a\n/dev/v4l-subdev6 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000003f:port0 -> n0000001e:port0 [style=bold]
+}
diff --git a/Documentation/admin-guide/media/c3-isp.rst b/Documentation/admin-guide/media/c3-isp.rst
new file mode 100644
index 000000000000..ac508b8c6831
--- /dev/null
+++ b/Documentation/admin-guide/media/c3-isp.rst
@@ -0,0 +1,101 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR MIT)
+
+.. include:: <isonum.txt>
+
+=================================================
+Amlogic C3 Image Signal Processing (C3ISP) driver
+=================================================
+
+Introduction
+============
+
+This file documents the Amlogic C3ISP driver located under
+drivers/media/platform/amlogic/c3/isp.
+
+The current version of the driver supports the C3ISP found on
+Amlogic C308L processor.
+
+The driver implements V4L2, Media controller and V4L2 subdev interfaces.
+Camera sensor using V4L2 subdev interface in the kernel is supported.
+
+The driver has been tested on AW419-C308L-Socket platform.
+
+Amlogic C3 ISP
+==============
+
+The Camera hardware found on C308L processors and supported by
+the driver consists of:
+
+- 1 MIPI-CSI-2 module: handles the physical layer of the MIPI CSI-2 receiver and
+ receives data from the connected camera sensor.
+- 1 MIPI-ADAPTER module: organizes MIPI data to meet ISP input requirements and
+ send MIPI data to ISP.
+- 1 ISP (Image Signal Processing) module: contains a pipeline of image processing
+ hardware blocks. The ISP pipeline contains three resizers at the end each of
+ them connected to a DMA interface which writes the output data to memory.
+
+A high-level functional view of the C3 ISP is presented below.::
+
+ +----------+ +-------+
+ | Resizer |--->| WRMIF |
+ +---------+ +------------+ +--------------+ +-------+ |----------+ +-------+
+ | Sensor |--->| MIPI CSI-2 |--->| MIPI ADAPTER |--->| ISP |---|----------+ +-------+
+ +---------+ +------------+ +--------------+ +-------+ | Resizer |--->| WRMIF |
+ +----------+ +-------+
+ |----------+ +-------+
+ | Resizer |--->| WRMIF |
+ +----------+ +-------+
+
+Driver architecture and design
+==============================
+
+With the goal to model the hardware links between the modules and to expose a
+clean, logical and usable interface, the driver registers the following V4L2
+sub-devices:
+
+- 1 `c3-mipi-csi2` sub-device - the MIPI CSI-2 receiver
+- 1 `c3-mipi-adapter` sub-device - the MIPI adapter
+- 1 `c3-isp-core` sub-device - the ISP core
+- 3 `c3-isp-resizer` sub-devices - the ISP resizers
+
+The `c3-isp-core` sub-device is linked to 2 video device nodes for statistics
+capture and parameters programming:
+
+- the `c3-isp-stats` capture video device node for statistics capture
+- the `c3-isp-params` output video device for parameters programming
+
+Each `c3-isp-resizer` sub-device is linked to a capture video device node where
+frames are captured from:
+
+- `c3-isp-resizer0` is linked to the `c3-isp-cap0` capture video device
+- `c3-isp-resizer1` is linked to the `c3-isp-cap1` capture video device
+- `c3-isp-resizer2` is linked to the `c3-isp-cap2` capture video device
+
+The media controller pipeline graph is as follows (with connected a
+IMX290 camera sensor):
+
+.. _isp_topology_graph:
+
+.. kernel-figure:: c3-isp.dot
+ :alt: c3-isp.dot
+ :align: center
+
+ Media pipeline topology
+
+Implementation
+==============
+
+Runtime configuration of the ISP hardware is performed on the `c3-isp-params`
+video device node using the :ref:`V4L2_META_FMT_C3ISP_PARAMS
+<v4l2-meta-fmt-c3isp-params>` as data format. The buffer structure is defined by
+:c:type:`c3_isp_params_cfg`.
+
+Statistics are captured from the `c3-isp-stats` video device node using the
+:ref:`V4L2_META_FMT_C3ISP_STATS <v4l2-meta-fmt-c3isp-stats>` data format.
+
+The final picture size and format is configured using the V4L2 video
+capture interface on the `c3-isp-cap[0, 2]` video device nodes.
+
+The Amlogic C3 ISP is supported by `libcamera <https://libcamera.org>`_ with a
+dedicated pipeline handler and algorithms that perform run-time image correction
+and enhancement.
diff --git a/Documentation/admin-guide/media/cec.rst b/Documentation/admin-guide/media/cec.rst
index 6b30e355cf23..b2e7a300494a 100644
--- a/Documentation/admin-guide/media/cec.rst
+++ b/Documentation/admin-guide/media/cec.rst
@@ -42,10 +42,14 @@ dongles):
``persistent_config``: by default this is off, but when set to 1 the driver
will store the current settings to the device's internal eeprom and restore
it the next time the device is connected to the USB port.
+
- RainShadow Tech. Note: this driver does not support the persistent_config
module option of the Pulse-Eight driver. The hardware supports it, but I
have no plans to add this feature. But I accept patches :-)
+- Extron DA HD 4K PLUS HDMI Distribution Amplifier. See
+ :ref:`extron_da_hd_4k_plus` for more information.
+
Miscellaneous:
- vivid: emulates a CEC receiver and CEC transmitter.
@@ -378,3 +382,86 @@ it later using ``--analyze-pin``.
You can also use this as a full-fledged CEC device by configuring it
using ``cec-ctl --tv -p0.0.0.0`` or ``cec-ctl --playback -p1.0.0.0``.
+
+.. _extron_da_hd_4k_plus:
+
+Extron DA HD 4K PLUS CEC Adapter driver
+=======================================
+
+This driver is for the Extron DA HD 4K PLUS series of HDMI Distribution
+Amplifiers: https://www.extron.com/product/dahd4kplusseries
+
+The 2, 4 and 6 port models are supported.
+
+Firmware version 1.02.0001 or higher is required.
+
+Note that older Extron hardware revisions have a problem with the CEC voltage,
+which may mean that CEC will not work. This is fixed in hardware revisions
+E34814 and up.
+
+The CEC support has two modes: the first is a manual mode where userspace has
+to manually control CEC for the HDMI Input and all HDMI Outputs. While this gives
+full control, it is also complicated.
+
+The second mode is an automatic mode, which is selected if the module option
+``vendor_id`` is set. In that case the driver controls CEC and CEC messages
+received in the input will be distributed to the outputs. It is still possible
+to use the /dev/cecX devices to talk to the connected devices directly, but it is
+the driver that configures everything and deals with things like Hotplug Detect
+changes.
+
+The driver also takes care of the EDIDs: /dev/videoX devices are created to
+read the EDIDs and (for the HDMI Input port) to set the EDID.
+
+By default userspace is responsible to set the EDID for the HDMI Input
+according to the EDIDs of the connected displays. But if the ``manufacturer_name``
+module option is set, then the driver will take care of setting the EDID
+of the HDMI Input based on the supported resolutions of the connected displays.
+Currently the driver only supports resolutions 1080p60 and 4kp60: if all connected
+displays support 4kp60, then it will advertise 4kp60 on the HDMI input, otherwise
+it will fall back to an EDID that just reports 1080p60.
+
+The status of the Extron is reported in ``/sys/kernel/debug/cec/cecX/status``.
+
+The extron-da-hd-4k-plus driver implements the following module options:
+
+``debug``
+---------
+
+If set to 1, then all serial port traffic is shown.
+
+``vendor_id``
+-------------
+
+The CEC Vendor ID to report to connected displays.
+
+If set, then the driver will take care of distributing CEC messages received
+on the input to the HDMI outputs. This is done for the following CEC messages:
+
+- <Standby>
+- <Image View On> and <Text View On>
+- <Give Device Power Status>
+- <Set System Audio Mode>
+- <Request Current Latency>
+
+If not set, then userspace is responsible for this, and it will have to
+configure the CEC devices for HDMI Input and the HDMI Outputs manually.
+
+``manufacturer_name``
+---------------------
+
+A three character manufacturer name that is used in the EDID for the HDMI
+Input. If not set, then userspace is responsible for configuring an EDID.
+If set, then the driver will update the EDID automatically based on the
+resolutions supported by the connected displays, and it will not be possible
+anymore to manually set the EDID for the HDMI Input.
+
+``hpd_never_low``
+-----------------
+
+If set, then the Hotplug Detect pin of the HDMI Input will always be high,
+even if nothing is connected to the HDMI Outputs. If not set (the default)
+then the Hotplug Detect pin of the HDMI input will go low if all the detected
+Hotplug Detect pins of the HDMI Outputs are also low.
+
+This option may be changed dynamically.
diff --git a/Documentation/admin-guide/media/em28xx-cardlist.rst b/Documentation/admin-guide/media/em28xx-cardlist.rst
index ace65718ea22..7dac07986d91 100644
--- a/Documentation/admin-guide/media/em28xx-cardlist.rst
+++ b/Documentation/admin-guide/media/em28xx-cardlist.rst
@@ -438,3 +438,11 @@ EM28xx cards list
- MyGica iGrabber
- em2860
- 1f4d:1abe
+ * - 106
+ - Hauppauge USB QuadHD ATSC
+ - em28274
+ - 2040:846d
+ * - 107
+ - MyGica UTV3 Analog USB2.0 TV Box
+ - em2860
+ - eb1a:2860
diff --git a/Documentation/admin-guide/media/i2c-cardlist.rst b/Documentation/admin-guide/media/i2c-cardlist.rst
index 1825a0bb47bd..fff962558cd5 100644
--- a/Documentation/admin-guide/media/i2c-cardlist.rst
+++ b/Documentation/admin-guide/media/i2c-cardlist.rst
@@ -91,7 +91,6 @@ ov5647 OmniVision OV5647 sensor
ov5670 OmniVision OV5670 sensor
ov5675 OmniVision OV5675 sensor
ov5695 OmniVision OV5695 sensor
-ov6650 OmniVision OV6650 sensor
ov7251 OmniVision OV7251 sensor
ov7640 OmniVision OV7640 sensor
ov7670 OmniVision OV7670 sensor
diff --git a/Documentation/admin-guide/media/imx.rst b/Documentation/admin-guide/media/imx.rst
index b8fa70f854fd..bb68100d8acb 100644
--- a/Documentation/admin-guide/media/imx.rst
+++ b/Documentation/admin-guide/media/imx.rst
@@ -96,7 +96,7 @@ Some of the features of this driver include:
motion compensation modes: low, medium, and high motion. Pipelines are
defined that allow sending frames to the VDIC subdev directly from the
CSI. There is also support in the future for sending frames to the
- VDIC from memory buffers via a output/mem2mem devices.
+ VDIC from memory buffers via output/mem2mem devices.
- Includes a Frame Interval Monitor (FIM) that can correct vertical sync
problems with the ADV718x video decoders.
diff --git a/Documentation/admin-guide/media/index.rst b/Documentation/admin-guide/media/index.rst
index be7e0e4482ca..b11737ae6c04 100644
--- a/Documentation/admin-guide/media/index.rst
+++ b/Documentation/admin-guide/media/index.rst
@@ -20,6 +20,11 @@ Documentation/driver-api/media/index.rst
- for driver development information and Kernel APIs used by
media devices;
+Documentation/process/debugging/media_specific_debugging_guide.rst
+
+ - for advice about essential tools and techniques to debug drivers on this
+ subsystem
+
.. toctree::
:caption: Table of Contents
:maxdepth: 2
diff --git a/Documentation/admin-guide/media/ipu3.rst b/Documentation/admin-guide/media/ipu3.rst
index 83b3cd03b35c..9c190942932e 100644
--- a/Documentation/admin-guide/media/ipu3.rst
+++ b/Documentation/admin-guide/media/ipu3.rst
@@ -98,7 +98,7 @@ frames in packed raw Bayer format to IPU3 CSI2 receiver.
# and that ov5670 sensor is connected to i2c bus 10 with address 0x36
export SDEV=$(media-ctl -d $MDEV -e "ov5670 10-0036")
- # Establish the link for the media devices using media-ctl [#f3]_
+ # Establish the link for the media devices using media-ctl
media-ctl -d $MDEV -l "ov5670:0 -> ipu3-csi2 0:0[1]"
# Set the format for the media devices
@@ -589,12 +589,8 @@ preserved.
References
==========
-.. [#f5] drivers/staging/media/ipu3/include/uapi/intel-ipu3.h
-
.. [#f1] https://github.com/intel/nvt
.. [#f2] http://git.ideasonboard.org/yavta.git
-.. [#f3] http://git.ideasonboard.org/?p=media-ctl.git;a=summary
-
.. [#f4] ImgU limitation requires an additional 16x16 for all input resolutions
diff --git a/Documentation/admin-guide/media/ipu6-isys.rst b/Documentation/admin-guide/media/ipu6-isys.rst
new file mode 100644
index 000000000000..d05086824a74
--- /dev/null
+++ b/Documentation/admin-guide/media/ipu6-isys.rst
@@ -0,0 +1,161 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+========================================================
+Intel Image Processing Unit 6 (IPU6) Input System driver
+========================================================
+
+Copyright |copy| 2023--2024 Intel Corporation
+
+Introduction
+============
+
+This file documents the Intel IPU6 (6th generation Image Processing Unit)
+Input System (MIPI CSI2 receiver) drivers located under
+drivers/media/pci/intel/ipu6.
+
+The Intel IPU6 can be found in certain Intel SoCs but not in all SKUs:
+
+* Tiger Lake
+* Jasper Lake
+* Alder Lake
+* Raptor Lake
+* Meteor Lake
+
+Intel IPU6 is made up of two components - Input System (ISYS) and Processing
+System (PSYS).
+
+The Input System mainly works as MIPI CSI-2 receiver which receives and
+processes the image data from the sensors and outputs the frames to memory.
+
+There are 2 driver modules - intel-ipu6 and intel-ipu6-isys. intel-ipu6 is an
+IPU6 common driver which does PCI configuration, firmware loading and parsing,
+firmware authentication, DMA mapping and IPU-MMU (internal Memory mapping Unit)
+configuration. intel_ipu6_isys implements V4L2, Media Controller and V4L2
+sub-device interfaces. The IPU6 ISYS driver supports camera sensors connected
+to the IPU6 ISYS through V4L2 sub-device sensor drivers.
+
+.. Note:: See Documentation/driver-api/media/drivers/ipu6.rst for more
+ information about the IPU6 hardware.
+
+Input system driver
+===================
+
+The Input System driver mainly configures CSI-2 D-PHY, constructs the firmware
+stream configuration, sends commands to firmware, gets response from hardware
+and firmware and then returns buffers to user. The ISYS is represented as
+several V4L2 sub-devices as well as video nodes.
+
+.. kernel-figure:: ipu6_isys_graph.svg
+ :alt: ipu6 isys media graph with multiple streams support
+
+ IPU6 ISYS media graph with multiple streams support
+
+The graph has been produced using the following command:
+
+.. code-block:: none
+
+ fdp -Gsplines=true -Tsvg < dot > dot.svg
+
+Capturing frames with IPU6 ISYS
+-------------------------------
+
+IPU6 ISYS is used to capture frames from the camera sensors connected to the
+CSI2 ports. The supported input formats of ISYS are listed in table below:
+
+.. tabularcolumns:: |p{0.8cm}|p{4.0cm}|p{4.0cm}|
+
+.. flat-table::
+ :header-rows: 1
+
+ * - IPU6 ISYS supported input formats
+
+ * - RGB565, RGB888
+
+ * - UYVY8, YUYV8
+
+ * - RAW8, RAW10, RAW12
+
+.. _ipu6_isys_capture_examples:
+
+Examples
+~~~~~~~~
+
+Here is an example of IPU6 ISYS raw capture on Dell XPS 9315 laptop. On this
+machine, ov01a10 sensor is connected to IPU ISYS CSI-2 port 2, which can
+generate images at sBGGR10 with resolution 1280x800.
+
+Using the media controller APIs, we can configure ov01a10 sensor by
+media-ctl [#f1]_ and yavta [#f2]_ to transmit frames to IPU6 ISYS.
+
+.. code-block:: none
+
+ # Example 1 capture frame from ov01a10 camera sensor
+ # This example assumes /dev/media0 as the IPU ISYS media device
+ export MDEV=/dev/media0
+
+ # Establish the link for the media devices using media-ctl
+ media-ctl -d $MDEV -l "\"ov01a10 3-0036\":0 -> \"Intel IPU6 CSI2 2\":0[1]"
+
+ # Set the format for the media devices
+ media-ctl -d $MDEV -V "ov01a10:0 [fmt:SBGGR10/1280x800]"
+ media-ctl -d $MDEV -V "Intel IPU6 CSI2 2:0 [fmt:SBGGR10/1280x800]"
+ media-ctl -d $MDEV -V "Intel IPU6 CSI2 2:1 [fmt:SBGGR10/1280x800]"
+
+Once the media pipeline is configured, desired sensor specific settings
+(such as exposure and gain settings) can be set, using the yavta tool.
+
+e.g
+
+.. code-block:: none
+
+ # and that ov01a10 sensor is connected to i2c bus 3 with address 0x36
+ export SDEV=$(media-ctl -d $MDEV -e "ov01a10 3-0036")
+
+ yavta -w 0x009e0903 400 $SDEV
+ yavta -w 0x009e0913 1000 $SDEV
+ yavta -w 0x009e0911 2000 $SDEV
+
+Once the desired sensor settings are set, frame captures can be done as below.
+
+e.g
+
+.. code-block:: none
+
+ yavta --data-prefix -u -c10 -n5 -I -s 1280x800 --file=/tmp/frame-#.bin \
+ -f SBGGR10 $(media-ctl -d $MDEV -e "Intel IPU6 ISYS Capture 0")
+
+With the above command, 10 frames are captured at 1280x800 resolution with
+sBGGR10 format. The captured frames are available as /tmp/frame-#.bin files.
+
+Here is another example of IPU6 ISYS RAW and metadata capture from camera
+sensor ov2740 on Lenovo X1 Yoga laptop.
+
+.. code-block:: none
+
+ media-ctl -l "\"ov2740 14-0036\":0 -> \"Intel IPU6 CSI2 1\":0[1]"
+ media-ctl -l "\"Intel IPU6 CSI2 1\":1 -> \"Intel IPU6 ISYS Capture 0\":0[1]"
+ media-ctl -l "\"Intel IPU6 CSI2 1\":2 -> \"Intel IPU6 ISYS Capture 1\":0[1]"
+
+ # set routing
+ media-ctl -R "\"Intel IPU6 CSI2 1\" [0/0->1/0[1],0/1->2/1[1]]"
+
+ media-ctl -V "\"Intel IPU6 CSI2 1\":0/0 [fmt:SGRBG10/1932x1092]"
+ media-ctl -V "\"Intel IPU6 CSI2 1\":0/1 [fmt:GENERIC_8/97x1]"
+ media-ctl -V "\"Intel IPU6 CSI2 1\":1/0 [fmt:SGRBG10/1932x1092]"
+ media-ctl -V "\"Intel IPU6 CSI2 1\":2/1 [fmt:GENERIC_8/97x1]"
+
+ CAPTURE_DEV=$(media-ctl -e "Intel IPU6 ISYS Capture 0")
+ ./yavta --data-prefix -c100 -n5 -I -s1932x1092 --file=/tmp/frame-#.bin \
+ -f SGRBG10 ${CAPTURE_DEV}
+
+ CAPTURE_META=$(media-ctl -e "Intel IPU6 ISYS Capture 1")
+ ./yavta --data-prefix -c100 -n5 -I -s97x1 -B meta-capture \
+ --file=/tmp/meta-#.bin -f GENERIC_8 ${CAPTURE_META}
+
+References
+==========
+
+.. [#f1] https://git.ideasonboard.org/media-ctl.git
+.. [#f2] https://git.ideasonboard.org/yavta.git
diff --git a/Documentation/admin-guide/media/ipu6_isys_graph.svg b/Documentation/admin-guide/media/ipu6_isys_graph.svg
new file mode 100644
index 000000000000..c8539ef320d2
--- /dev/null
+++ b/Documentation/admin-guide/media/ipu6_isys_graph.svg
@@ -0,0 +1,548 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.43.0 (0)
+ -->
+<!-- Title: board Pages: 1 -->
+<svg width="1703pt" height="1473pt"
+ viewBox="0.00 0.00 1703.00 1473.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1469)">
+<title>board</title>
+<polygon fill="white" stroke="transparent" points="-4,4 -4,-1469 1699,-1469 1699,4 -4,4"/>
+<!-- n00000001 -->
+<g id="node1" class="node">
+<title>n00000001</title>
+<polygon fill="yellow" stroke="black" points="832.99,-750.08 629.99,-750.08 629.99,-712.08 832.99,-712.08 832.99,-750.08"/>
+<text text-anchor="middle" x="731.49" y="-734.88" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 0</text>
+<text text-anchor="middle" x="731.49" y="-719.88" font-family="Times,serif" font-size="14.00">/dev/video0</text>
+</g>
+<!-- n00000005 -->
+<g id="node2" class="node">
+<title>n00000005</title>
+<polygon fill="yellow" stroke="black" points="1396.59,-771.88 1193.59,-771.88 1193.59,-733.88 1396.59,-733.88 1396.59,-771.88"/>
+<text text-anchor="middle" x="1295.09" y="-756.68" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 1</text>
+<text text-anchor="middle" x="1295.09" y="-741.68" font-family="Times,serif" font-size="14.00">/dev/video1</text>
+</g>
+<!-- n00000009 -->
+<g id="node3" class="node">
+<title>n00000009</title>
+<polygon fill="yellow" stroke="black" points="1118.52,-690.47 915.52,-690.47 915.52,-652.47 1118.52,-652.47 1118.52,-690.47"/>
+<text text-anchor="middle" x="1017.02" y="-675.27" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 2</text>
+<text text-anchor="middle" x="1017.02" y="-660.27" font-family="Times,serif" font-size="14.00">/dev/video2</text>
+</g>
+<!-- n0000000d -->
+<g id="node4" class="node">
+<title>n0000000d</title>
+<polygon fill="yellow" stroke="black" points="1105.89,-838.84 902.89,-838.84 902.89,-800.84 1105.89,-800.84 1105.89,-838.84"/>
+<text text-anchor="middle" x="1004.39" y="-823.64" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 3</text>
+<text text-anchor="middle" x="1004.39" y="-808.64" font-family="Times,serif" font-size="14.00">/dev/video3</text>
+</g>
+<!-- n00000011 -->
+<g id="node5" class="node">
+<title>n00000011</title>
+<polygon fill="yellow" stroke="black" points="1279.22,-992.95 1076.22,-992.95 1076.22,-954.95 1279.22,-954.95 1279.22,-992.95"/>
+<text text-anchor="middle" x="1177.72" y="-977.75" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 4</text>
+<text text-anchor="middle" x="1177.72" y="-962.75" font-family="Times,serif" font-size="14.00">/dev/video4</text>
+</g>
+<!-- n00000015 -->
+<g id="node6" class="node">
+<title>n00000015</title>
+<polygon fill="yellow" stroke="black" points="939.18,-984.91 736.18,-984.91 736.18,-946.91 939.18,-946.91 939.18,-984.91"/>
+<text text-anchor="middle" x="837.68" y="-969.71" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 5</text>
+<text text-anchor="middle" x="837.68" y="-954.71" font-family="Times,serif" font-size="14.00">/dev/video5</text>
+</g>
+<!-- n00000019 -->
+<g id="node7" class="node">
+<title>n00000019</title>
+<polygon fill="yellow" stroke="black" points="957.87,-527.99 754.87,-527.99 754.87,-489.99 957.87,-489.99 957.87,-527.99"/>
+<text text-anchor="middle" x="856.37" y="-512.79" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 6</text>
+<text text-anchor="middle" x="856.37" y="-497.79" font-family="Times,serif" font-size="14.00">/dev/video6</text>
+</g>
+<!-- n0000001d -->
+<g id="node8" class="node">
+<title>n0000001d</title>
+<polygon fill="yellow" stroke="black" points="1291.02,-542.15 1088.02,-542.15 1088.02,-504.15 1291.02,-504.15 1291.02,-542.15"/>
+<text text-anchor="middle" x="1189.52" y="-526.95" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 7</text>
+<text text-anchor="middle" x="1189.52" y="-511.95" font-family="Times,serif" font-size="14.00">/dev/video7</text>
+</g>
+<!-- n00000021 -->
+<g id="node9" class="node">
+<title>n00000021</title>
+<polygon fill="yellow" stroke="black" points="202.74,-611.46 -0.26,-611.46 -0.26,-573.46 202.74,-573.46 202.74,-611.46"/>
+<text text-anchor="middle" x="101.24" y="-596.26" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 8</text>
+<text text-anchor="middle" x="101.24" y="-581.26" font-family="Times,serif" font-size="14.00">/dev/video8</text>
+</g>
+<!-- n00000025 -->
+<g id="node10" class="node">
+<title>n00000025</title>
+<polygon fill="yellow" stroke="black" points="764.86,-637.89 561.86,-637.89 561.86,-599.89 764.86,-599.89 764.86,-637.89"/>
+<text text-anchor="middle" x="663.36" y="-622.69" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 9</text>
+<text text-anchor="middle" x="663.36" y="-607.69" font-family="Times,serif" font-size="14.00">/dev/video9</text>
+</g>
+<!-- n00000029 -->
+<g id="node11" class="node">
+<title>n00000029</title>
+<polygon fill="yellow" stroke="black" points="358.62,-519.5 146.62,-519.5 146.62,-481.5 358.62,-481.5 358.62,-519.5"/>
+<text text-anchor="middle" x="252.62" y="-504.3" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 10</text>
+<text text-anchor="middle" x="252.62" y="-489.3" font-family="Times,serif" font-size="14.00">/dev/video10</text>
+</g>
+<!-- n0000002d -->
+<g id="node12" class="node">
+<title>n0000002d</title>
+<polygon fill="yellow" stroke="black" points="481.4,-662.59 269.4,-662.59 269.4,-624.59 481.4,-624.59 481.4,-662.59"/>
+<text text-anchor="middle" x="375.4" y="-647.39" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 11</text>
+<text text-anchor="middle" x="375.4" y="-632.39" font-family="Times,serif" font-size="14.00">/dev/video11</text>
+</g>
+<!-- n00000031 -->
+<g id="node13" class="node">
+<title>n00000031</title>
+<polygon fill="yellow" stroke="black" points="637.17,-837.47 425.17,-837.47 425.17,-799.47 637.17,-799.47 637.17,-837.47"/>
+<text text-anchor="middle" x="531.17" y="-822.27" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 12</text>
+<text text-anchor="middle" x="531.17" y="-807.27" font-family="Times,serif" font-size="14.00">/dev/video12</text>
+</g>
+<!-- n00000035 -->
+<g id="node14" class="node">
+<title>n00000035</title>
+<polygon fill="yellow" stroke="black" points="337.75,-833.67 125.75,-833.67 125.75,-795.67 337.75,-795.67 337.75,-833.67"/>
+<text text-anchor="middle" x="231.75" y="-818.47" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 13</text>
+<text text-anchor="middle" x="231.75" y="-803.47" font-family="Times,serif" font-size="14.00">/dev/video13</text>
+</g>
+<!-- n00000039 -->
+<g id="node15" class="node">
+<title>n00000039</title>
+<polygon fill="yellow" stroke="black" points="393.07,-317.96 181.07,-317.96 181.07,-279.96 393.07,-279.96 393.07,-317.96"/>
+<text text-anchor="middle" x="287.07" y="-302.76" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 14</text>
+<text text-anchor="middle" x="287.07" y="-287.76" font-family="Times,serif" font-size="14.00">/dev/video14</text>
+</g>
+<!-- n0000003d -->
+<g id="node16" class="node">
+<title>n0000003d</title>
+<polygon fill="yellow" stroke="black" points="701.46,-391.04 489.46,-391.04 489.46,-353.04 701.46,-353.04 701.46,-391.04"/>
+<text text-anchor="middle" x="595.46" y="-375.84" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 15</text>
+<text text-anchor="middle" x="595.46" y="-360.84" font-family="Times,serif" font-size="14.00">/dev/video15</text>
+</g>
+<!-- n00000041 -->
+<g id="node17" class="node">
+<title>n00000041</title>
+<polygon fill="yellow" stroke="black" points="212.45,-1228.8 0.45,-1228.8 0.45,-1190.8 212.45,-1190.8 212.45,-1228.8"/>
+<text text-anchor="middle" x="106.45" y="-1213.6" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 16</text>
+<text text-anchor="middle" x="106.45" y="-1198.6" font-family="Times,serif" font-size="14.00">/dev/video16</text>
+</g>
+<!-- n00000045 -->
+<g id="node18" class="node">
+<title>n00000045</title>
+<polygon fill="yellow" stroke="black" points="784.86,-1252.38 572.86,-1252.38 572.86,-1214.38 784.86,-1214.38 784.86,-1252.38"/>
+<text text-anchor="middle" x="678.86" y="-1237.18" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 17</text>
+<text text-anchor="middle" x="678.86" y="-1222.18" font-family="Times,serif" font-size="14.00">/dev/video17</text>
+</g>
+<!-- n00000049 -->
+<g id="node19" class="node">
+<title>n00000049</title>
+<polygon fill="yellow" stroke="black" points="503.14,-1169.96 291.14,-1169.96 291.14,-1131.96 503.14,-1131.96 503.14,-1169.96"/>
+<text text-anchor="middle" x="397.14" y="-1154.76" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 18</text>
+<text text-anchor="middle" x="397.14" y="-1139.76" font-family="Times,serif" font-size="14.00">/dev/video18</text>
+</g>
+<!-- n0000004d -->
+<g id="node20" class="node">
+<title>n0000004d</title>
+<polygon fill="yellow" stroke="black" points="492.62,-1319.4 280.62,-1319.4 280.62,-1281.4 492.62,-1281.4 492.62,-1319.4"/>
+<text text-anchor="middle" x="386.62" y="-1304.2" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 19</text>
+<text text-anchor="middle" x="386.62" y="-1289.2" font-family="Times,serif" font-size="14.00">/dev/video19</text>
+</g>
+<!-- n00000051 -->
+<g id="node21" class="node">
+<title>n00000051</title>
+<polygon fill="yellow" stroke="black" points="680.74,-1464.66 468.74,-1464.66 468.74,-1426.66 680.74,-1426.66 680.74,-1464.66"/>
+<text text-anchor="middle" x="574.74" y="-1449.46" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 20</text>
+<text text-anchor="middle" x="574.74" y="-1434.46" font-family="Times,serif" font-size="14.00">/dev/video20</text>
+</g>
+<!-- n00000055 -->
+<g id="node22" class="node">
+<title>n00000055</title>
+<polygon fill="yellow" stroke="black" points="302.42,-1452.56 90.42,-1452.56 90.42,-1414.56 302.42,-1414.56 302.42,-1452.56"/>
+<text text-anchor="middle" x="196.42" y="-1437.36" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 21</text>
+<text text-anchor="middle" x="196.42" y="-1422.36" font-family="Times,serif" font-size="14.00">/dev/video21</text>
+</g>
+<!-- n00000059 -->
+<g id="node23" class="node">
+<title>n00000059</title>
+<polygon fill="yellow" stroke="black" points="319.89,-1018.32 107.89,-1018.32 107.89,-980.32 319.89,-980.32 319.89,-1018.32"/>
+<text text-anchor="middle" x="213.89" y="-1003.12" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 22</text>
+<text text-anchor="middle" x="213.89" y="-988.12" font-family="Times,serif" font-size="14.00">/dev/video22</text>
+</g>
+<!-- n0000005d -->
+<g id="node24" class="node">
+<title>n0000005d</title>
+<polygon fill="yellow" stroke="black" points="692.62,-1031.39 480.62,-1031.39 480.62,-993.39 692.62,-993.39 692.62,-1031.39"/>
+<text text-anchor="middle" x="586.62" y="-1016.19" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 23</text>
+<text text-anchor="middle" x="586.62" y="-1001.19" font-family="Times,serif" font-size="14.00">/dev/video23</text>
+</g>
+<!-- n00000061 -->
+<g id="node25" class="node">
+<title>n00000061</title>
+<polygon fill="yellow" stroke="black" points="1122.45,-248.8 910.45,-248.8 910.45,-210.8 1122.45,-210.8 1122.45,-248.8"/>
+<text text-anchor="middle" x="1016.45" y="-233.6" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 24</text>
+<text text-anchor="middle" x="1016.45" y="-218.6" font-family="Times,serif" font-size="14.00">/dev/video24</text>
+</g>
+<!-- n00000065 -->
+<g id="node26" class="node">
+<title>n00000065</title>
+<polygon fill="yellow" stroke="black" points="1694.86,-272.38 1482.86,-272.38 1482.86,-234.38 1694.86,-234.38 1694.86,-272.38"/>
+<text text-anchor="middle" x="1588.86" y="-257.18" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 25</text>
+<text text-anchor="middle" x="1588.86" y="-242.18" font-family="Times,serif" font-size="14.00">/dev/video25</text>
+</g>
+<!-- n00000069 -->
+<g id="node27" class="node">
+<title>n00000069</title>
+<polygon fill="yellow" stroke="black" points="1413.14,-189.96 1201.14,-189.96 1201.14,-151.96 1413.14,-151.96 1413.14,-189.96"/>
+<text text-anchor="middle" x="1307.14" y="-174.76" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 26</text>
+<text text-anchor="middle" x="1307.14" y="-159.76" font-family="Times,serif" font-size="14.00">/dev/video26</text>
+</g>
+<!-- n0000006d -->
+<g id="node28" class="node">
+<title>n0000006d</title>
+<polygon fill="yellow" stroke="black" points="1402.62,-339.4 1190.62,-339.4 1190.62,-301.4 1402.62,-301.4 1402.62,-339.4"/>
+<text text-anchor="middle" x="1296.62" y="-324.2" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 27</text>
+<text text-anchor="middle" x="1296.62" y="-309.2" font-family="Times,serif" font-size="14.00">/dev/video27</text>
+</g>
+<!-- n00000071 -->
+<g id="node29" class="node">
+<title>n00000071</title>
+<polygon fill="yellow" stroke="black" points="1590.74,-484.66 1378.74,-484.66 1378.74,-446.66 1590.74,-446.66 1590.74,-484.66"/>
+<text text-anchor="middle" x="1484.74" y="-469.46" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 28</text>
+<text text-anchor="middle" x="1484.74" y="-454.46" font-family="Times,serif" font-size="14.00">/dev/video28</text>
+</g>
+<!-- n00000075 -->
+<g id="node30" class="node">
+<title>n00000075</title>
+<polygon fill="yellow" stroke="black" points="1212.42,-472.56 1000.42,-472.56 1000.42,-434.56 1212.42,-434.56 1212.42,-472.56"/>
+<text text-anchor="middle" x="1106.42" y="-457.36" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 29</text>
+<text text-anchor="middle" x="1106.42" y="-442.36" font-family="Times,serif" font-size="14.00">/dev/video29</text>
+</g>
+<!-- n00000079 -->
+<g id="node31" class="node">
+<title>n00000079</title>
+<polygon fill="yellow" stroke="black" points="1229.89,-38.32 1017.89,-38.32 1017.89,-0.32 1229.89,-0.32 1229.89,-38.32"/>
+<text text-anchor="middle" x="1123.89" y="-23.12" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 30</text>
+<text text-anchor="middle" x="1123.89" y="-8.12" font-family="Times,serif" font-size="14.00">/dev/video30</text>
+</g>
+<!-- n0000007d -->
+<g id="node32" class="node">
+<title>n0000007d</title>
+<polygon fill="yellow" stroke="black" points="1602.62,-51.39 1390.62,-51.39 1390.62,-13.39 1602.62,-13.39 1602.62,-51.39"/>
+<text text-anchor="middle" x="1496.62" y="-36.19" font-family="Times,serif" font-size="14.00">Intel IPU6 ISYS Capture 31</text>
+<text text-anchor="middle" x="1496.62" y="-21.19" font-family="Times,serif" font-size="14.00">/dev/video31</text>
+</g>
+<!-- n00000081 -->
+<g id="node33" class="node">
+<title>n00000081</title>
+<path fill="green" stroke="black" d="M924.28,-700.28C924.28,-700.28 1108.28,-700.28 1108.28,-700.28 1114.28,-700.28 1120.28,-706.28 1120.28,-712.28 1120.28,-712.28 1120.28,-772.28 1120.28,-772.28 1120.28,-778.28 1114.28,-784.28 1108.28,-784.28 1108.28,-784.28 924.28,-784.28 924.28,-784.28 918.28,-784.28 912.28,-778.28 912.28,-772.28 912.28,-772.28 912.28,-712.28 912.28,-712.28 912.28,-706.28 918.28,-700.28 924.28,-700.28"/>
+<text text-anchor="middle" x="1016.28" y="-769.08" font-family="Times,serif" font-size="14.00">0</text>
+<polyline fill="none" stroke="black" points="912.28,-761.28 1120.28,-761.28 "/>
+<text text-anchor="middle" x="1016.28" y="-746.08" font-family="Times,serif" font-size="14.00">Intel IPU6 CSI2 0</text>
+<text text-anchor="middle" x="1016.28" y="-731.08" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev0</text>
+<polyline fill="none" stroke="black" points="912.28,-723.28 1120.28,-723.28 "/>
+<text text-anchor="middle" x="925.28" y="-708.08" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="938.28,-700.28 938.28,-723.28 "/>
+<text text-anchor="middle" x="951.28" y="-708.08" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="964.28,-700.28 964.28,-723.28 "/>
+<text text-anchor="middle" x="977.28" y="-708.08" font-family="Times,serif" font-size="14.00">3</text>
+<polyline fill="none" stroke="black" points="990.28,-700.28 990.28,-723.28 "/>
+<text text-anchor="middle" x="1003.28" y="-708.08" font-family="Times,serif" font-size="14.00">4</text>
+<polyline fill="none" stroke="black" points="1016.28,-700.28 1016.28,-723.28 "/>
+<text text-anchor="middle" x="1029.28" y="-708.08" font-family="Times,serif" font-size="14.00">5</text>
+<polyline fill="none" stroke="black" points="1042.28,-700.28 1042.28,-723.28 "/>
+<text text-anchor="middle" x="1055.28" y="-708.08" font-family="Times,serif" font-size="14.00">6</text>
+<polyline fill="none" stroke="black" points="1068.28,-700.28 1068.28,-723.28 "/>
+<text text-anchor="middle" x="1081.28" y="-708.08" font-family="Times,serif" font-size="14.00">7</text>
+<polyline fill="none" stroke="black" points="1094.28,-700.28 1094.28,-723.28 "/>
+<text text-anchor="middle" x="1107.28" y="-708.08" font-family="Times,serif" font-size="14.00">8</text>
+</g>
+<!-- n00000081&#45;&gt;n00000001 -->
+<g id="edge1" class="edge">
+<title>n00000081:port1&#45;&gt;n00000001</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M912.28,-711.28C912.28,-711.28 880.33,-714.78 843.28,-718.84"/>
+<polygon fill="black" stroke="black" points="842.81,-715.37 833.25,-719.94 843.57,-722.33 842.81,-715.37"/>
+</g>
+<!-- n00000081&#45;&gt;n00000005 -->
+<g id="edge2" class="edge">
+<title>n00000081:port2&#45;&gt;n00000005</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M951.38,-700.28C951.38,-700.28 1086.18,-688.61 1123.48,-697.08 1155.93,-704.45 1158.99,-719.67 1190.39,-730.68 1190.49,-730.71 1190.59,-730.75 1190.69,-730.78"/>
+<polygon fill="black" stroke="black" points="1189.45,-734.06 1200.05,-733.86 1191.64,-727.41 1189.45,-734.06"/>
+</g>
+<!-- n00000081&#45;&gt;n00000009 -->
+<g id="edge3" class="edge">
+<title>n00000081:port3&#45;&gt;n00000009</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M977.28,-700.28C977.28,-700.28 979.31,-698.81 982.45,-696.54"/>
+<polygon fill="black" stroke="black" points="984.7,-699.23 990.74,-690.53 980.59,-693.56 984.7,-699.23"/>
+</g>
+<!-- n00000081&#45;&gt;n0000000d -->
+<g id="edge4" class="edge">
+<title>n00000081:port4&#45;&gt;n0000000d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1003.38,-700.26C1003.38,-700.26 916.62,-689.8 909.08,-697.08 880.2,-725.01 885.68,-754.82 909.08,-787.48 910.88,-789.99 918.96,-793.59 929.7,-797.47"/>
+<polygon fill="black" stroke="black" points="928.69,-800.82 939.28,-800.79 930.98,-794.21 928.69,-800.82"/>
+</g>
+<!-- n00000081&#45;&gt;n00000011 -->
+<g id="edge5" class="edge">
+<title>n00000081:port5&#45;&gt;n00000011</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1029.19,-700.26C1029.19,-700.26 1115.28,-690.56 1123.48,-697.08 1198.37,-756.64 1190.55,-886.51 1182.64,-944.71"/>
+<polygon fill="black" stroke="black" points="1179.16,-944.31 1181.18,-954.71 1186.09,-945.32 1179.16,-944.31"/>
+</g>
+<!-- n00000081&#45;&gt;n00000015 -->
+<g id="edge6" class="edge">
+<title>n00000081:port6&#45;&gt;n00000015</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1055.18,-700.28C1055.18,-700.28 915.57,-692.2 909.08,-697.08 834.02,-753.51 831.79,-879.34 835.06,-936.56"/>
+<polygon fill="black" stroke="black" points="831.58,-936.99 835.74,-946.73 838.56,-936.52 831.58,-936.99"/>
+</g>
+<!-- n00000081&#45;&gt;n00000019 -->
+<g id="edge7" class="edge">
+<title>n00000081:port7&#45;&gt;n00000019</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1081.28,-700.28C1081.28,-700.28 916.04,-696.54 912.32,-693.67 864.52,-656.73 856.3,-580.22 855.62,-538.2"/>
+<polygon fill="black" stroke="black" points="859.11,-538.05 855.59,-528.06 852.11,-538.07 859.11,-538.05"/>
+</g>
+<!-- n00000081&#45;&gt;n0000001d -->
+<g id="edge8" class="edge">
+<title>n00000081:port8&#45;&gt;n0000001d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1107.28,-700.28C1107.28,-700.28 1119.29,-696.23 1121.72,-693.67 1159.76,-653.62 1177.38,-589.6 1184.78,-552.46"/>
+<polygon fill="black" stroke="black" points="1188.29,-552.76 1186.69,-542.29 1181.41,-551.47 1188.29,-552.76"/>
+</g>
+<!-- n0000008b -->
+<g id="node34" class="node">
+<title>n0000008b</title>
+<path fill="green" stroke="black" d="M293.1,-532.08C293.1,-532.08 477.1,-532.08 477.1,-532.08 483.1,-532.08 489.1,-538.08 489.1,-544.08 489.1,-544.08 489.1,-604.08 489.1,-604.08 489.1,-610.08 483.1,-616.08 477.1,-616.08 477.1,-616.08 293.1,-616.08 293.1,-616.08 287.1,-616.08 281.1,-610.08 281.1,-604.08 281.1,-604.08 281.1,-544.08 281.1,-544.08 281.1,-538.08 287.1,-532.08 293.1,-532.08"/>
+<text text-anchor="middle" x="385.1" y="-600.88" font-family="Times,serif" font-size="14.00">0</text>
+<polyline fill="none" stroke="black" points="281.1,-593.08 489.1,-593.08 "/>
+<text text-anchor="middle" x="385.1" y="-577.88" font-family="Times,serif" font-size="14.00">Intel IPU6 CSI2 1</text>
+<text text-anchor="middle" x="385.1" y="-562.88" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev1</text>
+<polyline fill="none" stroke="black" points="281.1,-555.08 489.1,-555.08 "/>
+<text text-anchor="middle" x="294.1" y="-539.88" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="307.1,-532.08 307.1,-555.08 "/>
+<text text-anchor="middle" x="320.1" y="-539.88" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="333.1,-532.08 333.1,-555.08 "/>
+<text text-anchor="middle" x="346.1" y="-539.88" font-family="Times,serif" font-size="14.00">3</text>
+<polyline fill="none" stroke="black" points="359.1,-532.08 359.1,-555.08 "/>
+<text text-anchor="middle" x="372.1" y="-539.88" font-family="Times,serif" font-size="14.00">4</text>
+<polyline fill="none" stroke="black" points="385.1,-532.08 385.1,-555.08 "/>
+<text text-anchor="middle" x="398.1" y="-539.88" font-family="Times,serif" font-size="14.00">5</text>
+<polyline fill="none" stroke="black" points="411.1,-532.08 411.1,-555.08 "/>
+<text text-anchor="middle" x="424.1" y="-539.88" font-family="Times,serif" font-size="14.00">6</text>
+<polyline fill="none" stroke="black" points="437.1,-532.08 437.1,-555.08 "/>
+<text text-anchor="middle" x="450.1" y="-539.88" font-family="Times,serif" font-size="14.00">7</text>
+<polyline fill="none" stroke="black" points="463.1,-532.08 463.1,-555.08 "/>
+<text text-anchor="middle" x="476.1" y="-539.88" font-family="Times,serif" font-size="14.00">8</text>
+</g>
+<!-- n0000008b&#45;&gt;n00000021 -->
+<g id="edge9" class="edge">
+<title>n0000008b:port1&#45;&gt;n00000021</title>
+<path fill="none" stroke="black" d="M281.1,-543.08C281.1,-543.08 240.1,-560.51 205.94,-570.26 205.35,-570.43 204.77,-570.59 204.18,-570.76"/>
+<polygon fill="black" stroke="black" points="203.2,-567.39 194.47,-573.39 205.03,-574.15 203.2,-567.39"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000025 -->
+<g id="edge10" class="edge">
+<title>n0000008b:port2&#45;&gt;n00000025</title>
+<path fill="none" stroke="black" d="M320.2,-532.07C320.2,-532.07 456.9,-514.37 492.3,-528.88 528.42,-543.68 522.86,-571.78 556.11,-594.53"/>
+<polygon fill="black" stroke="black" points="554.54,-597.67 564.9,-599.88 558.18,-591.69 554.54,-597.67"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000029 -->
+<g id="edge11" class="edge">
+<title>n0000008b:port3&#45;&gt;n00000029</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M346.1,-532.08C346.1,-532.08 333.93,-527.96 318.37,-522.71"/>
+<polygon fill="black" stroke="black" points="319.48,-519.39 308.88,-519.5 317.24,-526.02 319.48,-519.39"/>
+</g>
+<!-- n0000008b&#45;&gt;n0000002d -->
+<g id="edge12" class="edge">
+<title>n0000008b:port4&#45;&gt;n0000002d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M372.19,-532.05C372.19,-532.05 292.97,-514.3 277.9,-528.88 249.01,-556.8 253.16,-587.62 277.9,-619.28 278.34,-619.85 280.33,-620.69 283.45,-621.71"/>
+<polygon fill="black" stroke="black" points="282.71,-625.14 293.29,-624.58 284.67,-618.42 282.71,-625.14"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000031 -->
+<g id="edge13" class="edge">
+<title>n0000008b:port5&#45;&gt;n00000031</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M398,-532.05C398,-532.05 476.28,-515.34 492.3,-528.88 568.49,-593.29 550.55,-729.67 538.14,-789.41"/>
+<polygon fill="black" stroke="black" points="534.69,-788.79 535.99,-799.31 541.53,-790.28 534.69,-788.79"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000035 -->
+<g id="edge14" class="edge">
+<title>n0000008b:port6&#45;&gt;n00000035</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M424,-532.07C424,-532.07 290.37,-518.48 277.9,-528.88 202.27,-591.86 215.34,-725.69 225.66,-785.15"/>
+<polygon fill="black" stroke="black" points="222.29,-786.14 227.54,-795.35 229.17,-784.88 222.29,-786.14"/>
+</g>
+<!-- n0000008b&#45;&gt;n00000039 -->
+<g id="edge15" class="edge">
+<title>n0000008b:port7&#45;&gt;n00000039</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M450.1,-532.08C450.1,-532.08 395.22,-528.13 383.45,-518.65 375.46,-512.21 322.64,-385.46 298.76,-327.47"/>
+<polygon fill="black" stroke="black" points="301.96,-326.05 294.92,-318.14 295.49,-328.72 301.96,-326.05"/>
+</g>
+<!-- n0000008b&#45;&gt;n0000003d -->
+<g id="edge16" class="edge">
+<title>n0000008b:port8&#45;&gt;n0000003d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M476.1,-532.08C476.1,-532.08 522.37,-522.39 526.85,-518.65 563.15,-488.33 581.38,-434.52 589.6,-401.2"/>
+<polygon fill="black" stroke="black" points="593.08,-401.69 591.93,-391.16 586.26,-400.11 593.08,-401.69"/>
+</g>
+<!-- n00000095 -->
+<g id="node35" class="node">
+<title>n00000095</title>
+<path fill="green" stroke="black" d="M301.38,-1180.11C301.38,-1180.11 485.38,-1180.11 485.38,-1180.11 491.38,-1180.11 497.38,-1186.11 497.38,-1192.11 497.38,-1192.11 497.38,-1252.11 497.38,-1252.11 497.38,-1258.11 491.38,-1264.11 485.38,-1264.11 485.38,-1264.11 301.38,-1264.11 301.38,-1264.11 295.38,-1264.11 289.38,-1258.11 289.38,-1252.11 289.38,-1252.11 289.38,-1192.11 289.38,-1192.11 289.38,-1186.11 295.38,-1180.11 301.38,-1180.11"/>
+<text text-anchor="middle" x="393.38" y="-1248.91" font-family="Times,serif" font-size="14.00">0</text>
+<polyline fill="none" stroke="black" points="289.38,-1241.11 497.38,-1241.11 "/>
+<text text-anchor="middle" x="393.38" y="-1225.91" font-family="Times,serif" font-size="14.00">Intel IPU6 CSI2 2</text>
+<text text-anchor="middle" x="393.38" y="-1210.91" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev2</text>
+<polyline fill="none" stroke="black" points="289.38,-1203.11 497.38,-1203.11 "/>
+<text text-anchor="middle" x="302.38" y="-1187.91" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="315.38,-1180.11 315.38,-1203.11 "/>
+<text text-anchor="middle" x="328.38" y="-1187.91" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="341.38,-1180.11 341.38,-1203.11 "/>
+<text text-anchor="middle" x="354.38" y="-1187.91" font-family="Times,serif" font-size="14.00">3</text>
+<polyline fill="none" stroke="black" points="367.38,-1180.11 367.38,-1203.11 "/>
+<text text-anchor="middle" x="380.38" y="-1187.91" font-family="Times,serif" font-size="14.00">4</text>
+<polyline fill="none" stroke="black" points="393.38,-1180.11 393.38,-1203.11 "/>
+<text text-anchor="middle" x="406.38" y="-1187.91" font-family="Times,serif" font-size="14.00">5</text>
+<polyline fill="none" stroke="black" points="419.38,-1180.11 419.38,-1203.11 "/>
+<text text-anchor="middle" x="432.38" y="-1187.91" font-family="Times,serif" font-size="14.00">6</text>
+<polyline fill="none" stroke="black" points="445.38,-1180.11 445.38,-1203.11 "/>
+<text text-anchor="middle" x="458.38" y="-1187.91" font-family="Times,serif" font-size="14.00">7</text>
+<polyline fill="none" stroke="black" points="471.38,-1180.11 471.38,-1203.11 "/>
+<text text-anchor="middle" x="484.38" y="-1187.91" font-family="Times,serif" font-size="14.00">8</text>
+</g>
+<!-- n00000095&#45;&gt;n00000041 -->
+<g id="edge17" class="edge">
+<title>n00000095:port1&#45;&gt;n00000041</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M289.38,-1191.11C289.38,-1191.11 258.94,-1194.22 222.89,-1197.91"/>
+<polygon fill="black" stroke="black" points="222.19,-1194.46 212.6,-1198.96 222.9,-1201.42 222.19,-1194.46"/>
+</g>
+<!-- n00000095&#45;&gt;n00000045 -->
+<g id="edge18" class="edge">
+<title>n00000095:port2&#45;&gt;n00000045</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M328.48,-1180.11C328.48,-1180.11 463.26,-1168.53 500.58,-1176.91 534.02,-1184.43 537.24,-1200.06 569.66,-1211.18 569.76,-1211.22 569.86,-1211.25 569.96,-1211.29"/>
+<polygon fill="black" stroke="black" points="568.86,-1214.61 579.45,-1214.34 571,-1207.95 568.86,-1214.61"/>
+</g>
+<!-- n00000095&#45;&gt;n00000049 -->
+<g id="edge19" class="edge">
+<title>n00000095:port3&#45;&gt;n00000049</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M354.38,-1180.11C354.38,-1180.11 356.8,-1178.46 360.49,-1175.94"/>
+<polygon fill="black" stroke="black" points="362.56,-1178.77 368.86,-1170.24 358.62,-1172.98 362.56,-1178.77"/>
+</g>
+<!-- n00000095&#45;&gt;n0000004d -->
+<g id="edge20" class="edge">
+<title>n00000095:port4&#45;&gt;n0000004d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M380.47,-1180.09C380.47,-1180.09 293.71,-1169.63 286.18,-1176.91 257.29,-1204.84 262.63,-1234.76 286.18,-1267.31 288.16,-1270.05 297.33,-1273.96 309.38,-1278.13"/>
+<polygon fill="black" stroke="black" points="308.49,-1281.53 319.09,-1281.36 310.7,-1274.88 308.49,-1281.53"/>
+</g>
+<!-- n00000095&#45;&gt;n00000051 -->
+<g id="edge21" class="edge">
+<title>n00000095:port5&#45;&gt;n00000051</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M406.28,-1180.09C406.28,-1180.09 492.13,-1170.7 500.58,-1176.91 576.41,-1232.66 579.83,-1358.79 577.09,-1416.2"/>
+<polygon fill="black" stroke="black" points="573.59,-1416.23 576.51,-1426.41 580.58,-1416.63 573.59,-1416.23"/>
+</g>
+<!-- n00000095&#45;&gt;n00000055 -->
+<g id="edge22" class="edge">
+<title>n00000095:port6&#45;&gt;n00000055</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M432.28,-1180.11C432.28,-1180.11 292.85,-1172.29 286.18,-1176.91 211.26,-1228.86 198.3,-1348.49 196.45,-1404.12"/>
+<polygon fill="black" stroke="black" points="192.94,-1404.28 196.21,-1414.36 199.94,-1404.44 192.94,-1404.28"/>
+</g>
+<!-- n00000095&#45;&gt;n00000059 -->
+<g id="edge23" class="edge">
+<title>n00000095:port7&#45;&gt;n00000059</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M458.38,-1180.11C458.38,-1180.11 291.84,-1175.85 287.94,-1173.16 239.87,-1139.96 222.85,-1068.83 216.94,-1028.6"/>
+<polygon fill="black" stroke="black" points="220.39,-1028.06 215.6,-1018.61 213.46,-1028.98 220.39,-1028.06"/>
+</g>
+<!-- n00000095&#45;&gt;n0000005d -->
+<g id="edge24" class="edge">
+<title>n00000095:port8&#45;&gt;n0000005d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M484.38,-1180.11C484.38,-1180.11 502.45,-1176.49 506.34,-1173.16 547.25,-1138.2 569.47,-1077.38 579.62,-1041.41"/>
+<polygon fill="black" stroke="black" points="583.06,-1042.09 582.28,-1031.53 576.3,-1040.27 583.06,-1042.09"/>
+</g>
+<!-- n0000009f -->
+<g id="node36" class="node">
+<title>n0000009f</title>
+<path fill="green" stroke="black" d="M1211.38,-200.11C1211.38,-200.11 1395.38,-200.11 1395.38,-200.11 1401.38,-200.11 1407.38,-206.11 1407.38,-212.11 1407.38,-212.11 1407.38,-272.11 1407.38,-272.11 1407.38,-278.11 1401.38,-284.11 1395.38,-284.11 1395.38,-284.11 1211.38,-284.11 1211.38,-284.11 1205.38,-284.11 1199.38,-278.11 1199.38,-272.11 1199.38,-272.11 1199.38,-212.11 1199.38,-212.11 1199.38,-206.11 1205.38,-200.11 1211.38,-200.11"/>
+<text text-anchor="middle" x="1303.38" y="-268.91" font-family="Times,serif" font-size="14.00">0</text>
+<polyline fill="none" stroke="black" points="1199.38,-261.11 1407.38,-261.11 "/>
+<text text-anchor="middle" x="1303.38" y="-245.91" font-family="Times,serif" font-size="14.00">Intel IPU6 CSI2 3</text>
+<text text-anchor="middle" x="1303.38" y="-230.91" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev3</text>
+<polyline fill="none" stroke="black" points="1199.38,-223.11 1407.38,-223.11 "/>
+<text text-anchor="middle" x="1212.38" y="-207.91" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="1225.38,-200.11 1225.38,-223.11 "/>
+<text text-anchor="middle" x="1238.38" y="-207.91" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="1251.38,-200.11 1251.38,-223.11 "/>
+<text text-anchor="middle" x="1264.38" y="-207.91" font-family="Times,serif" font-size="14.00">3</text>
+<polyline fill="none" stroke="black" points="1277.38,-200.11 1277.38,-223.11 "/>
+<text text-anchor="middle" x="1290.38" y="-207.91" font-family="Times,serif" font-size="14.00">4</text>
+<polyline fill="none" stroke="black" points="1303.38,-200.11 1303.38,-223.11 "/>
+<text text-anchor="middle" x="1316.38" y="-207.91" font-family="Times,serif" font-size="14.00">5</text>
+<polyline fill="none" stroke="black" points="1329.38,-200.11 1329.38,-223.11 "/>
+<text text-anchor="middle" x="1342.38" y="-207.91" font-family="Times,serif" font-size="14.00">6</text>
+<polyline fill="none" stroke="black" points="1355.38,-200.11 1355.38,-223.11 "/>
+<text text-anchor="middle" x="1368.38" y="-207.91" font-family="Times,serif" font-size="14.00">7</text>
+<polyline fill="none" stroke="black" points="1381.38,-200.11 1381.38,-223.11 "/>
+<text text-anchor="middle" x="1394.38" y="-207.91" font-family="Times,serif" font-size="14.00">8</text>
+</g>
+<!-- n0000009f&#45;&gt;n00000061 -->
+<g id="edge25" class="edge">
+<title>n0000009f:port1&#45;&gt;n00000061</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1199.38,-211.11C1199.38,-211.11 1168.94,-214.22 1132.89,-217.91"/>
+<polygon fill="black" stroke="black" points="1132.19,-214.46 1122.6,-218.96 1132.9,-221.42 1132.19,-214.46"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000065 -->
+<g id="edge26" class="edge">
+<title>n0000009f:port2&#45;&gt;n00000065</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1238.48,-200.11C1238.48,-200.11 1373.26,-188.53 1410.58,-196.91 1444.02,-204.43 1447.24,-220.06 1479.66,-231.18 1479.76,-231.22 1479.86,-231.25 1479.96,-231.29"/>
+<polygon fill="black" stroke="black" points="1478.86,-234.61 1489.45,-234.34 1481,-227.95 1478.86,-234.61"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000069 -->
+<g id="edge27" class="edge">
+<title>n0000009f:port3&#45;&gt;n00000069</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1264.38,-200.11C1264.38,-200.11 1266.8,-198.46 1270.49,-195.94"/>
+<polygon fill="black" stroke="black" points="1272.56,-198.77 1278.86,-190.24 1268.62,-192.98 1272.56,-198.77"/>
+</g>
+<!-- n0000009f&#45;&gt;n0000006d -->
+<g id="edge28" class="edge">
+<title>n0000009f:port4&#45;&gt;n0000006d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1290.47,-200.09C1290.47,-200.09 1203.71,-189.63 1196.18,-196.91 1167.29,-224.84 1172.63,-254.76 1196.18,-287.31 1198.16,-290.05 1207.33,-293.96 1219.38,-298.13"/>
+<polygon fill="black" stroke="black" points="1218.49,-301.53 1229.09,-301.36 1220.7,-294.88 1218.49,-301.53"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000071 -->
+<g id="edge29" class="edge">
+<title>n0000009f:port5&#45;&gt;n00000071</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1316.28,-200.09C1316.28,-200.09 1402.13,-190.7 1410.58,-196.91 1486.41,-252.66 1489.83,-378.79 1487.09,-436.2"/>
+<polygon fill="black" stroke="black" points="1483.59,-436.23 1486.51,-446.41 1490.58,-436.63 1483.59,-436.23"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000075 -->
+<g id="edge30" class="edge">
+<title>n0000009f:port6&#45;&gt;n00000075</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1342.28,-200.11C1342.28,-200.11 1202.85,-192.29 1196.18,-196.91 1121.26,-248.86 1108.3,-368.49 1106.45,-424.12"/>
+<polygon fill="black" stroke="black" points="1102.94,-424.28 1106.21,-434.36 1109.94,-424.44 1102.94,-424.28"/>
+</g>
+<!-- n0000009f&#45;&gt;n00000079 -->
+<g id="edge31" class="edge">
+<title>n0000009f:port7&#45;&gt;n00000079</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1368.38,-200.11C1368.38,-200.11 1201.84,-195.85 1197.94,-193.16 1149.87,-159.96 1132.85,-88.83 1126.94,-48.6"/>
+<polygon fill="black" stroke="black" points="1130.39,-48.06 1125.6,-38.61 1123.46,-48.98 1130.39,-48.06"/>
+</g>
+<!-- n0000009f&#45;&gt;n0000007d -->
+<g id="edge32" class="edge">
+<title>n0000009f:port8&#45;&gt;n0000007d</title>
+<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1394.38,-200.11C1394.38,-200.11 1412.45,-196.49 1416.34,-193.16 1457.25,-158.2 1479.47,-97.38 1489.62,-61.41"/>
+<polygon fill="black" stroke="black" points="1493.06,-62.09 1492.28,-51.53 1486.3,-60.27 1493.06,-62.09"/>
+</g>
+<!-- n000000e9 -->
+<g id="node37" class="node">
+<title>n000000e9</title>
+<path fill="green" stroke="black" d="M398.65,-431.45C398.65,-431.45 511.65,-431.45 511.65,-431.45 517.65,-431.45 523.65,-437.45 523.65,-443.45 523.65,-443.45 523.65,-503.45 523.65,-503.45 523.65,-509.45 517.65,-515.45 511.65,-515.45 511.65,-515.45 398.65,-515.45 398.65,-515.45 392.65,-515.45 386.65,-509.45 386.65,-503.45 386.65,-503.45 386.65,-443.45 386.65,-443.45 386.65,-437.45 392.65,-431.45 398.65,-431.45"/>
+<text text-anchor="middle" x="420.65" y="-500.25" font-family="Times,serif" font-size="14.00">1</text>
+<polyline fill="none" stroke="black" points="454.65,-492.45 454.65,-515.45 "/>
+<text text-anchor="middle" x="489.15" y="-500.25" font-family="Times,serif" font-size="14.00">2</text>
+<polyline fill="none" stroke="black" points="386.65,-492.45 523.65,-492.45 "/>
+<text text-anchor="middle" x="455.15" y="-477.25" font-family="Times,serif" font-size="14.00">ov2740 4&#45;0036</text>
+<text text-anchor="middle" x="455.15" y="-462.25" font-family="Times,serif" font-size="14.00">/dev/v4l&#45;subdev4</text>
+<polyline fill="none" stroke="black" points="386.65,-454.45 523.65,-454.45 "/>
+<text text-anchor="middle" x="455.15" y="-439.25" font-family="Times,serif" font-size="14.00">0</text>
+</g>
+<!-- n000000e9&#45;&gt;n0000008b -->
+<g id="edge33" class="edge">
+<title>n000000e9:port0&#45;&gt;n0000008b:port0</title>
+<path fill="none" stroke="black" stroke-width="2" d="M386.14,-442.55C386.14,-442.55 361.11,-493.23 383.45,-518.65 391.47,-527.78 484.31,-519.72 492.3,-528.88 508.64,-547.6 499.26,-579.87 493.12,-595.68"/>
+<polygon fill="black" stroke="black" stroke-width="2" points="489.86,-594.41 489.11,-604.98 496.29,-597.19 489.86,-594.41"/>
+</g>
+</g>
+</svg>
diff --git a/Documentation/admin-guide/media/ivtv.rst b/Documentation/admin-guide/media/ivtv.rst
index 101f16d0263e..8b65ac3f5321 100644
--- a/Documentation/admin-guide/media/ivtv.rst
+++ b/Documentation/admin-guide/media/ivtv.rst
@@ -3,7 +3,7 @@
The ivtv driver
===============
-Author: Hans Verkuil <hverkuil@xs4all.nl>
+Author: Hans Verkuil <hverkuil@kernel.org>
This is a v4l2 device driver for the Conexant cx23415/6 MPEG encoder/decoder.
The cx23415 can do both encoding and decoding, the cx23416 can only do MPEG
diff --git a/Documentation/admin-guide/media/mali-c55-graph.dot b/Documentation/admin-guide/media/mali-c55-graph.dot
new file mode 100644
index 000000000000..0775ba42bf4c
--- /dev/null
+++ b/Documentation/admin-guide/media/mali-c55-graph.dot
@@ -0,0 +1,19 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{} | mali-c55 tpg\n/dev/v4l-subdev0 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port0 -> n00000003:port0 [style=dashed]
+ n00000003 [label="{{<port0> 0} | mali-c55 isp\n/dev/v4l-subdev1 | {<port1> 1 | <port2> 2}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000003:port1 -> n00000007:port0 [style=bold]
+ n00000003:port2 -> n00000007:port2 [style=bold]
+ n00000003:port1 -> n0000000b:port0 [style=bold]
+ n00000007 [label="{{<port0> 0 | <port2> 2} | mali-c55 resizer fr\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000007:port1 -> n0000000e [style=bold]
+ n0000000b [label="{{<port0> 0} | mali-c55 resizer ds\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000b:port1 -> n00000012 [style=bold]
+ n0000000e [label="mali-c55 fr\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n00000012 [label="mali-c55 ds\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n00000022 [label="{{<port0> 0} | csi2-rx\n/dev/v4l-subdev4 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000022:port1 -> n00000003:port0
+ n00000027 [label="{{} | imx415 1-001a\n/dev/v4l-subdev5 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000027:port0 -> n00000022:port0 [style=bold]
+} \ No newline at end of file
diff --git a/Documentation/admin-guide/media/mali-c55.rst b/Documentation/admin-guide/media/mali-c55.rst
new file mode 100644
index 000000000000..315f982000c4
--- /dev/null
+++ b/Documentation/admin-guide/media/mali-c55.rst
@@ -0,0 +1,413 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================================
+ARM Mali-C55 Image Signal Processor driver
+==========================================
+
+Introduction
+============
+
+This file documents the driver for ARM's Mali-C55 Image Signal Processor. The
+driver is located under drivers/media/platform/arm/mali-c55.
+
+The Mali-C55 ISP receives data in either raw Bayer format or RGB/YUV format from
+sensors through either a parallel interface or a memory bus before processing it
+and outputting it through an internal DMA engine. Two output pipelines are
+possible (though one may not be fitted, depending on the implementation). These
+are referred to as "Full resolution" and "Downscale", but the naming is historic
+and both pipes are capable of cropping/scaling operations. The full resolution
+pipe is also capable of outputting RAW data, bypassing much of the ISP's
+processing. The downscale pipe cannot output RAW data. An integrated test
+pattern generator can be used to drive the ISP and produce image data in the
+absence of a connected camera sensor. The driver module is named mali_c55, and
+is enabled through the CONFIG_VIDEO_MALI_C55 config option.
+
+The driver implements V4L2, Media Controller and V4L2 Subdevice interfaces and
+expects camera sensors connected to the ISP to have V4L2 subdevice interfaces.
+
+Mali-C55 ISP hardware
+=====================
+
+A high level functional view of the Mali-C55 ISP is presented below. The ISP
+takes input from either a live source or through a DMA engine for memory input,
+depending on the SoC integration.::
+
+ +---------+ +----------+ +--------+
+ | Sensor |--->| CSI-2 Rx | "Full Resolution" | DMA |
+ +---------+ +----------+ |\ Output +--->| Writer |
+ | | \ | +--------+
+ | | \ +----------+ +------+---> Streaming I/O
+ +------------+ +------->| | | | |
+ | | | |-->| Mali-C55 |--+
+ | DMA Reader |--------------->| | | ISP | |
+ | | | / | | | +---> Streaming I/O
+ +------------+ | / +----------+ | |
+ |/ +------+
+ | +--------+
+ +--->| DMA |
+ "Downscaled" | Writer |
+ Output +--------+
+
+Media Controller Topology
+=========================
+
+An example of the ISP's topology (as implemented in a system with an IMX415
+camera sensor and generic CSI-2 receiver) is below:
+
+
+.. kernel-figure:: mali-c55-graph.dot
+ :alt: mali-c55-graph.dot
+ :align: center
+
+The driver has 4 V4L2 subdevices:
+
+- `mali_c55 isp`: Responsible for configuring input crop and color space
+ conversion
+- `mali_c55 tpg`: The test pattern generator, emulating a camera sensor.
+- `mali_c55 resizer fr`: The Full-Resolution pipe resizer
+- `mali_c55 resizer ds`: The Downscale pipe resizer
+
+The driver has 3 V4L2 video devices:
+
+- `mali-c55 fr`: The full-resolution pipe's capture device
+- `mali-c55 ds`: The downscale pipe's capture device
+- `mali-c55 3a stats`: The 3A statistics capture device
+
+Frame sequences are synchronised across to two capture devices, meaning if one
+pipe is started later than the other the sequence numbers returned in its
+buffers will match those of the other pipe rather than starting from zero.
+
+Idiosyncrasies
+--------------
+
+**mali-c55 isp**
+The `mali-c55 isp` subdevice has a single sink pad to which all sources of data
+should be connected. The active source is selected by enabling the appropriate
+media link and disabling all others. The ISP has two source pads, reflecting the
+different paths through which it can internally route data. Tap points within
+the ISP allow users to divert data to avoid processing by some or all of the
+hardware's processing steps. The diagram below is intended only to highlight how
+the bypassing works and is not a true reflection of those processing steps; for
+a high-level functional block diagram see ARM's developer page for the
+ISP [3]_::
+
+ +--------------------------------------------------------------+
+ | Possible Internal ISP Data Routes |
+ | +------------+ +----------+ +------------+ |
+ +---+ | | | | | Colour | +---+
+ | 0 |--+-->| Processing |->| Demosaic |->| Space |--->| 1 |
+ +---+ | | | | | | Conversion | +---+
+ | | +------------+ +----------+ +------------+ |
+ | | +---+
+ | +---------------------------------------------------| 2 |
+ | +---+
+ | |
+ +--------------------------------------------------------------+
+
+
+.. flat-table::
+ :header-rows: 1
+
+ * - Pad
+ - Direction
+ - Purpose
+
+ * - 0
+ - sink
+ - Data input, connected to the TPG and camera sensors
+
+ * - 1
+ - source
+ - RGB/YUV data, connected to the FR and DS V4L2 subdevices
+
+ * - 2
+ - source
+ - RAW bayer data, connected to the FR V4L2 subdevices
+
+The ISP is limited to both input and output resolutions between 640x480 and
+8192x8192, and this is reflected in the ISP and resizer subdevice's .set_fmt()
+operations.
+
+**mali-c55 resizer fr**
+The `mali-c55 resizer fr` subdevice has two _sink_ pads to reflect the different
+insertion points in the hardware (either RAW or demosaiced data):
+
+.. flat-table::
+ :header-rows: 1
+
+ * - Pad
+ - Direction
+ - Purpose
+
+ * - 0
+ - sink
+ - Data input connected to the ISP's demosaiced stream.
+
+ * - 1
+ - source
+ - Data output connected to the capture video device
+
+ * - 2
+ - sink
+ - Data input connected to the ISP's raw data stream
+
+The data source in use is selected through the routing API; two routes each of a
+single stream are available:
+
+.. flat-table::
+ :header-rows: 1
+
+ * - Sink Pad
+ - Source Pad
+ - Purpose
+
+ * - 0
+ - 1
+ - Demosaiced data route
+
+ * - 2
+ - 1
+ - Raw data route
+
+
+If the demosaiced route is active then the FR pipe is only capable of output
+in RGB/YUV formats. If the raw route is active then the output reflects the
+input (which may be either Bayer or RGB/YUV data).
+
+Using the driver to capture video
+=================================
+
+Using the media controller APIs we can configure the input source and ISP to
+capture images in a variety of formats. In the examples below, configuring the
+media graph is done with the v4l-utils [1]_ package's media-ctl utility.
+Capturing the images is done with yavta [2]_.
+
+Configuring the input source
+----------------------------
+
+The first step is to set the input source that we wish by enabling the correct
+media link. Using the example topology above, we can select the TPG as follows:
+
+.. code-block:: none
+
+ media-ctl -l "'lte-csi2-rx':1->'mali-c55 isp':0[0]"
+ media-ctl -l "'mali-c55 tpg':0->'mali-c55 isp':0[1]"
+
+Configuring which video devices will stream data
+------------------------------------------------
+
+The driver will wait for all video devices to have their VIDIOC_STREAMON ioctl
+called before it tells the sensor to start streaming. To facilitate this we need
+to enable links to the video devices that we want to use. In the example below
+we enable the links to both of the image capture video devices
+
+.. code-block:: none
+
+ media-ctl -l "'mali-c55 resizer fr':1->'mali-c55 fr':0[1]"
+ media-ctl -l "'mali-c55 resizer ds':1->'mali-c55 ds':0[1]"
+
+Capturing bayer data from the source and processing to RGB/YUV
+--------------------------------------------------------------
+
+To capture 1920x1080 bayer data from the source and push it through the ISP's
+full processing pipeline, we configure the data formats appropriately on the
+source, ISP and resizer subdevices and set the FR resizer's routing to select
+processed data. The media bus format on the resizer's source pad will be either
+RGB121212_1X36 or YUV10_1X30, depending on whether you want to capture RGB or
+YUV. The ISP's debayering block outputs RGB data natively, setting the source
+pad format to YUV10_1X30 enables the colour space conversion block.
+
+In this example we target RGB565 output, so select RGB121212_1X36 as the resizer
+source pad's format:
+
+.. code-block:: none
+
+ # Set formats on the TPG and ISP
+ media-ctl -V "'mali-c55 tpg':0[fmt:SRGGB20_1X20/1920x1080]"
+ media-ctl -V "'mali-c55 isp':0[fmt:SRGGB20_1X20/1920x1080]"
+ media-ctl -V "'mali-c55 isp':1[fmt:SRGGB20_1X20/1920x1080]"
+
+ # Set routing on the FR resizer
+ media-ctl -R "'mali-c55 resizer fr'[0/0->1/0[1],2/0->1/0[0]]"
+
+ # Set format on the resizer, must be done AFTER the routing.
+ media-ctl -V "'mali-c55 resizer fr':1[fmt:RGB121212_1X36/1920x1080]"
+
+The downscale output can also be used to stream data at the same time. In this
+case since only processed data can be captured through the downscale output no
+routing need be set:
+
+.. code-block:: none
+
+ # Set format on the resizer
+ media-ctl -V "'mali-c55 resizer ds':1[fmt:RGB121212_1X36/1920x1080]"
+
+Following which images can be captured from both the FR and DS output's video
+devices (simultaneously, if desired):
+
+.. code-block:: none
+
+ yavta -f RGB565 -s 1920x1080 -c10 /dev/video0
+ yavta -f RGB565 -s 1920x1080 -c10 /dev/video1
+
+Cropping the image
+~~~~~~~~~~~~~~~~~~
+
+Both the full resolution and downscale pipes can crop to a minimum resolution of
+640x480. To crop the image simply configure the resizer's sink pad's crop and
+compose rectangles and set the format on the video device:
+
+.. code-block:: none
+
+ media-ctl -V "'mali-c55 resizer fr':0[fmt:RGB121212_1X36/1920x1080 crop:(480,270)/640x480 compose:(0,0)/640x480]"
+ media-ctl -V "'mali-c55 resizer fr':1[fmt:RGB121212_1X36/640x480]"
+ yavta -f RGB565 -s 640x480 -c10 /dev/video0
+
+Downscaling the image
+~~~~~~~~~~~~~~~~~~~~~
+
+Both the full resolution and downscale pipes can downscale the image by up to 8x
+provided the minimum 640x480 output resolution is adhered to. For the best image
+result the scaling ratio for each direction should be the same. To configure
+scaling we use the compose rectangle on the resizer's sink pad:
+
+.. code-block:: none
+
+ media-ctl -V "'mali-c55 resizer fr':0[fmt:RGB121212_1X36/1920x1080 crop:(0,0)/1920x1080 compose:(0,0)/640x480]"
+ media-ctl -V "'mali-c55 resizer fr':1[fmt:RGB121212_1X36/640x480]"
+ yavta -f RGB565 -s 640x480 -c10 /dev/video0
+
+Capturing images in YUV formats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If we need to output YUV data rather than RGB the color space conversion block
+needs to be active, which is achieved by setting MEDIA_BUS_FMT_YUV10_1X30 on the
+resizer's source pad. We can then configure a capture format like NV12 (here in
+its multi-planar variant)
+
+.. code-block:: none
+
+ media-ctl -V "'mali-c55 resizer fr':1[fmt:YUV10_1X30/1920x1080]"
+ yavta -f NV12M -s 1920x1080 -c10 /dev/video0
+
+Capturing RGB data from the source and processing it with the resizers
+----------------------------------------------------------------------
+
+The Mali-C55 ISP can work with sensors capable of outputting RGB data. In this
+case although none of the image quality blocks would be used it can still
+crop/scale the data in the usual way. For this reason RGB data input to the ISP
+still goes through the ISP subdevice's pad 1 to the resizer.
+
+To achieve this, the ISP's sink pad's format is set to
+MEDIA_BUS_FMT_RGB202020_1X60 - this reflects the format that data must be in to
+work with the ISP. Converting the camera sensor's output to that format is the
+responsibility of external hardware.
+
+In this example we ask the test pattern generator to give us RGB data instead of
+bayer.
+
+.. code-block:: none
+
+ media-ctl -V "'mali-c55 tpg':0[fmt:RGB202020_1X60/1920x1080]"
+ media-ctl -V "'mali-c55 isp':0[fmt:RGB202020_1X60/1920x1080]"
+
+Cropping or scaling the data can be done in exactly the same way as outlined
+earlier.
+
+Capturing raw data from the source and outputting it unmodified
+-----------------------------------------------------------------
+
+The ISP can additionally capture raw data from the source and output it on the
+full resolution pipe only, completely unmodified. In this case the downscale
+pipe can still process the data normally and be used at the same time.
+
+To configure raw bypass the FR resizer's subdevice's routing table needs to be
+configured, followed by formats in the appropriate places:
+
+.. code-block:: none
+
+ media-ctl -R "'mali-c55 resizer fr'[0/0->1/0[0],2/0->1/0[1]]"
+ media-ctl -V "'mali-c55 isp':0[fmt:RGB202020_1X60/1920x1080]"
+ media-ctl -V "'mali-c55 resizer fr':2[fmt:RGB202020_1X60/1920x1080]"
+ media-ctl -V "'mali-c55 resizer fr':1[fmt:RGB202020_1X60/1920x1080]"
+
+ # Set format on the video device and stream
+ yavta -f RGB565 -s 1920x1080 -c10 /dev/video0
+
+.. _mali-c55-3a-stats:
+
+Capturing ISP Statistics
+========================
+
+The ISP is capable of producing statistics for consumption by image processing
+algorithms running in userspace. These statistics can be captured by queueing
+buffers to the `mali-c55 3a stats` V4L2 Device whilst the ISP is streaming. Only
+the :ref:`V4L2_META_FMT_MALI_C55_STATS <v4l2-meta-fmt-mali-c55-stats>`
+format is supported, so no format-setting need be done:
+
+.. code-block:: none
+
+ # We assume the media graph has been configured to support RGB565 capture
+ # from the mali-c55 fr V4L2 Device, which is at /dev/video0. The statistics
+ # V4L2 device is at /dev/video3
+
+ yavta -f RGB565 -s 1920x1080 -c32 /dev/video0 && \
+ yavta -c10 -F /dev/video3
+
+The layout of the buffer is described by :c:type:`mali_c55_stats_buffer`,
+but broadly statistics are generated to support three image processing
+algorithms; AEXP (Auto-Exposure), AWB (Auto-White Balance) and AF (Auto-Focus).
+These stats can be drawn from various places in the Mali C55 ISP pipeline, known
+as "tap points". This high-level block diagram is intended to explain where in
+the processing flow the statistics can be drawn from::
+
+ +--> AEXP-2 +----> AEXP-1 +--> AF-0
+ | +----> AF-1 |
+ | | |
+ +---------+ | +--------------+ | +--------------+ |
+ | Input +-+-->+ Digital Gain +---+-->+ Black Level +---+---+
+ +---------+ +--------------+ +--------------+ |
+ +-----------------------------------------------------------------+
+ |
+ | +--------------+ +---------+ +----------------+
+ +-->| Sinter Noise +-+ White +--+--->| Lens Shading +--+---------------+
+ | Reduction | | Balance | | | | | |
+ +--------------+ +---------+ | +----------------+ | |
+ +---> AEXP-0 (A) +--> AEXP-0 (B) |
+ +--------------------------------------------------------------------------+
+ |
+ | +----------------+ +--------------+ +----------------+
+ +-->| Tone mapping +-+--->| Demosaicing +->+ Purple Fringe +-+-----------+
+ | | | +--------------+ | Correction | | |
+ +----------------+ +-> AEXP-IRIDIX +----------------+ +---> AWB-0 |
+ +----------------------------------------------------------------------------+
+ | +-------------+ +-------------+
+ +------------------->| Colour +---+--->| Output |
+ | Correction | | | Pipelines |
+ +-------------+ | +-------------+
+ +--> AWB-1
+
+By default all statistics are drawn from the 0th tap point for each algorithm;
+I.E. AEXP statistics from AEXP-0 (A), AWB statistics from AWB-0 and AF
+statistics from AF-0. This is configurable for AEXP and AWB statsistics through
+programming the ISP's parameters.
+
+.. _mali-c55-3a-params:
+
+Programming ISP Parameters
+==========================
+
+The ISP can be programmed with various parameters from userspace to apply to the
+hardware before and during video stream. This allows userspace to dynamically
+change values such as black level, white balance and lens shading gains and so
+on.
+
+The buffer format and how to populate it are described by the
+:ref:`V4L2_META_FMT_MALI_C55_PARAMS <v4l2-meta-fmt-mali-c55-params>` format,
+which should be set as the data format for the `mali-c55 3a params` video node.
+
+References
+==========
+.. [1] https://git.linuxtv.org/v4l-utils.git/
+.. [2] https://git.ideasonboard.org/yavta.git
+.. [3] https://developer.arm.com/Processors/Mali-C55
diff --git a/Documentation/admin-guide/media/mgb4.rst b/Documentation/admin-guide/media/mgb4.rst
index 2977f74d7e26..5ac69b833a7a 100644
--- a/Documentation/admin-guide/media/mgb4.rst
+++ b/Documentation/admin-guide/media/mgb4.rst
@@ -1,8 +1,19 @@
.. SPDX-License-Identifier: GPL-2.0
-====================
-mgb4 sysfs interface
-====================
+.. include:: <isonum.txt>
+
+The mgb4 driver
+===============
+
+Copyright |copy| 2023 - 2025 Digiteq Automotive
+ author: Martin Tůma <martin.tuma@digiteqautomotive.com>
+
+This is a v4l2 device driver for the Digiteq Automotive FrameGrabber 4, a PCIe
+card capable of capturing and generating FPD-Link III and GMSL2/3 video streams
+as used in the automotive industry.
+
+sysfs interface
+---------------
The mgb4 driver provides a sysfs interface, that is used to configure video
stream related parameters (some of them must be set properly before the v4l2
@@ -12,16 +23,17 @@ There are two types of parameters - global / PCI card related, found under
``/sys/class/video4linux/videoX/device`` and module specific found under
``/sys/class/video4linux/videoX``.
-
Global (PCI card) parameters
-============================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**module_type** (R):
Module type.
| 0 - No module present
| 1 - FPDL3
- | 2 - GMSL
+ | 2 - GMSL (one serializer, two daisy chained deserializers)
+ | 3 - GMSL (one serializer, two deserializers)
+ | 4 - GMSL (two deserializers with two daisy chain outputs)
**module_version** (R):
Module version number. Zero in case of a missing module.
@@ -42,9 +54,8 @@ Global (PCI card) parameters
where each component is a 8b number.
-
Common FPDL3/GMSL input parameters
-==================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**input_id** (R):
Input number ID, zero based.
@@ -190,9 +201,8 @@ Common FPDL3/GMSL input parameters
*Note: This parameter can not be changed while the input v4l2 device is
open.*
-
Common FPDL3/GMSL output parameters
-===================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**output_id** (R):
Output number ID, zero based.
@@ -228,8 +238,13 @@ Common FPDL3/GMSL output parameters
open.*
**frame_rate** (RW):
- Output video frame rate in frames per second. The default frame rate is
- 60Hz.
+ Output video signal frame rate limit in frames per second. Due to
+ the limited output pixel clock steps, the card can not always generate
+ a frame rate perfectly matching the value required by the connected display.
+ Using this parameter one can limit the frame rate by "crippling" the signal
+ so that the lines are not equal (the porches of the last line differ) but
+ the signal appears like having the exact frame rate to the connected display.
+ The default frame rate limit is 60Hz.
**hsync_polarity** (RW):
HSYNC signal polarity.
@@ -254,37 +269,36 @@ Common FPDL3/GMSL output parameters
and there is a non-linear stepping between two consecutive allowed
frequencies. The driver finds the nearest allowed frequency to the given
value and sets it. When reading this property, you get the exact
- frequency set by the driver. The default frequency is 70000kHz.
+ frequency set by the driver. The default frequency is 61150kHz.
*Note: This parameter can not be changed while the output v4l2 device is
open.*
**hsync_width** (RW):
- Width of the HSYNC signal in pixels. The default value is 16.
+ Width of the HSYNC signal in pixels. The default value is 40.
**vsync_width** (RW):
- Width of the VSYNC signal in video lines. The default value is 2.
+ Width of the VSYNC signal in video lines. The default value is 20.
**hback_porch** (RW):
Number of PCLK pulses between deassertion of the HSYNC signal and the first
- valid pixel in the video line (marked by DE=1). The default value is 32.
+ valid pixel in the video line (marked by DE=1). The default value is 50.
**hfront_porch** (RW):
Number of PCLK pulses between the end of the last valid pixel in the video
line (marked by DE=1) and assertion of the HSYNC signal. The default value
- is 32.
+ is 50.
**vback_porch** (RW):
Number of video lines between deassertion of the VSYNC signal and the video
- line with the first valid pixel (marked by DE=1). The default value is 2.
+ line with the first valid pixel (marked by DE=1). The default value is 31.
**vfront_porch** (RW):
Number of video lines between the end of the last valid pixel line (marked
- by DE=1) and assertion of the VSYNC signal. The default value is 2.
-
+ by DE=1) and assertion of the VSYNC signal. The default value is 30.
FPDL3 specific input parameters
-===============================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**fpdl3_input_width** (RW):
Number of deserializer input lines.
@@ -294,7 +308,7 @@ FPDL3 specific input parameters
| 2 - dual
FPDL3 specific output parameters
-================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**fpdl3_output_width** (RW):
Number of serializer output lines.
@@ -304,7 +318,7 @@ FPDL3 specific output parameters
| 2 - dual
GMSL specific input parameters
-==============================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**gmsl_mode** (RW):
GMSL speed mode.
@@ -328,10 +342,8 @@ GMSL specific input parameters
| 0 - disabled
| 1 - enabled (default)
-
-====================
-mgb4 mtd partitions
-====================
+MTD partitions
+--------------
The mgb4 driver creates a MTD device with two partitions:
- mgb4-fw.X - FPGA firmware.
@@ -344,9 +356,8 @@ also have a third partition named *mgb4-flash* available in the system. This
partition represents the whole, unpartitioned, card's FLASH memory and one should
not fiddle with it...
-====================
-mgb4 iio (triggers)
-====================
+IIO (triggers)
+--------------
The mgb4 driver creates an Industrial I/O (IIO) device that provides trigger and
signal level status capability. The following scan elements are available:
diff --git a/Documentation/admin-guide/media/omap4_camera.rst b/Documentation/admin-guide/media/omap4_camera.rst
deleted file mode 100644
index 2ada9b1e6897..000000000000
--- a/Documentation/admin-guide/media/omap4_camera.rst
+++ /dev/null
@@ -1,62 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-OMAP4 ISS Driver
-================
-
-Author: Sergio Aguirre <sergio.a.aguirre@gmail.com>
-
-Copyright (C) 2012, Texas Instruments
-
-Introduction
-------------
-
-The OMAP44XX family of chips contains the Imaging SubSystem (a.k.a. ISS),
-Which contains several components that can be categorized in 3 big groups:
-
-- Interfaces (2 Interfaces: CSI2-A & CSI2-B/CCP2)
-- ISP (Image Signal Processor)
-- SIMCOP (Still Image Coprocessor)
-
-For more information, please look in [#f1]_ for latest version of:
-"OMAP4430 Multimedia Device Silicon Revision 2.x"
-
-As of Revision AB, the ISS is described in detail in section 8.
-
-This driver is supporting **only** the CSI2-A/B interfaces for now.
-
-It makes use of the Media Controller framework [#f2]_, and inherited most of the
-code from OMAP3 ISP driver (found under drivers/media/platform/ti/omap3isp/\*),
-except that it doesn't need an IOMMU now for ISS buffers memory mapping.
-
-Supports usage of MMAP buffers only (for now).
-
-Tested platforms
-----------------
-
-- OMAP4430SDP, w/ ES2.1 GP & SEVM4430-CAM-V1-0 (Contains IMX060 & OV5640, in
- which only the last one is supported, outputting YUV422 frames).
-
-- TI Blaze MDP, w/ OMAP4430 ES2.2 EMU (Contains 1 IMX060 & 2 OV5650 sensors, in
- which only the OV5650 are supported, outputting RAW10 frames).
-
-- PandaBoard, Rev. A2, w/ OMAP4430 ES2.1 GP & OV adapter board, tested with
- following sensors:
- * OV5640
- * OV5650
-
-- Tested on mainline kernel:
-
- http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=summary
-
- Tag: v3.3 (commit c16fa4f2ad19908a47c63d8fa436a1178438c7e7)
-
-File list
----------
-drivers/staging/media/omap4iss/
-include/linux/platform_data/media/omap4iss.h
-
-References
-----------
-
-.. [#f1] http://focus.ti.com/general/docs/wtbu/wtbudocumentcenter.tsp?navigationId=12037&templateId=6123#62
-.. [#f2] http://lwn.net/Articles/420485/
diff --git a/Documentation/admin-guide/media/pci-cardlist.rst b/Documentation/admin-guide/media/pci-cardlist.rst
index 7d8e3c8987db..239879634ea5 100644
--- a/Documentation/admin-guide/media/pci-cardlist.rst
+++ b/Documentation/admin-guide/media/pci-cardlist.rst
@@ -86,7 +86,6 @@ saa7134 Philips SAA7134
saa7164 NXP SAA7164
smipcie SMI PCIe DVBSky cards
solo6x10 Bluecherry / Softlogic 6x10 capture cards (MPEG-4/H.264)
-sta2x11_vip STA2X11 VIP Video For Linux
tw5864 Techwell TW5864 video/audio grabber and encoder
tw686x Intersil/Techwell TW686x
tw68 Techwell tw68x Video For Linux
diff --git a/Documentation/admin-guide/media/platform-cardlist.rst b/Documentation/admin-guide/media/platform-cardlist.rst
index 1230ae4037ad..63f4b19c3628 100644
--- a/Documentation/admin-guide/media/platform-cardlist.rst
+++ b/Documentation/admin-guide/media/platform-cardlist.rst
@@ -18,8 +18,6 @@ am437x-vpfe TI AM437x VPFE
aspeed-video Aspeed AST2400 and AST2500
atmel-isc ATMEL Image Sensor Controller (ISC)
atmel-isi ATMEL Image Sensor Interface (ISI)
-c8sectpfe SDR platform devices
-c8sectpfe SDR platform devices
cafe_ccic Marvell 88ALP01 (Cafe) CMOS Camera Controller
cdns-csi2rx Cadence MIPI-CSI2 RX Controller
cdns-csi2tx Cadence MIPI-CSI2 TX Controller
diff --git a/Documentation/admin-guide/media/radio-cardlist.rst b/Documentation/admin-guide/media/radio-cardlist.rst
index a82a146bf912..cec724256812 100644
--- a/Documentation/admin-guide/media/radio-cardlist.rst
+++ b/Documentation/admin-guide/media/radio-cardlist.rst
@@ -30,7 +30,6 @@ radio-terratec TerraTec ActiveRadio ISA Standalone
radio-timb Enable the Timberdale radio driver
radio-trust Trust FM radio card
radio-typhoon Typhoon Radio (a.k.a. EcoRadio)
-radio-wl1273 Texas Instruments WL1273 I2C FM Radio
fm_drv ISA radio devices
fm_drv ISA radio devices
radio-zoltrix Zoltrix Radio
diff --git a/Documentation/admin-guide/media/raspberrypi-pisp-be.dot b/Documentation/admin-guide/media/raspberrypi-pisp-be.dot
new file mode 100644
index 000000000000..55671dc1d443
--- /dev/null
+++ b/Documentation/admin-guide/media/raspberrypi-pisp-be.dot
@@ -0,0 +1,20 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0 | <port1> 1 | <port2> 2 | <port7> 7} | pispbe\n | {<port3> 3 | <port4> 4 | <port5> 5 | <port6> 6}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port3 -> n0000001c [style=bold]
+ n00000001:port4 -> n00000022 [style=bold]
+ n00000001:port5 -> n00000028 [style=bold]
+ n00000001:port6 -> n0000002e [style=bold]
+ n0000000a [label="pispbe-input\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n0000000a -> n00000001:port0 [style=bold]
+ n00000010 [label="pispbe-tdn_input\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n00000010 -> n00000001:port1 [style=bold]
+ n00000016 [label="pispbe-stitch_input\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n00000016 -> n00000001:port2 [style=bold]
+ n0000001c [label="pispbe-output0\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n00000022 [label="pispbe-output1\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
+ n00000028 [label="pispbe-tdn_output\n/dev/video5", shape=box, style=filled, fillcolor=yellow]
+ n0000002e [label="pispbe-stitch_output\n/dev/video6", shape=box, style=filled, fillcolor=yellow]
+ n00000034 [label="pispbe-config\n/dev/video7", shape=box, style=filled, fillcolor=yellow]
+ n00000034 -> n00000001:port7 [style=bold]
+}
diff --git a/Documentation/admin-guide/media/raspberrypi-pisp-be.rst b/Documentation/admin-guide/media/raspberrypi-pisp-be.rst
new file mode 100644
index 000000000000..0fcf46f26276
--- /dev/null
+++ b/Documentation/admin-guide/media/raspberrypi-pisp-be.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================================
+Raspberry Pi PiSP Back End Memory-to-Memory ISP (pisp-be)
+=========================================================
+
+The PiSP Back End
+=================
+
+The PiSP Back End is a memory-to-memory Image Signal Processor (ISP) which reads
+image data from DRAM memory and performs image processing as specified by the
+application through the parameters in a configuration buffer, before writing
+pixel data back to memory through two distinct output channels.
+
+The ISP registers and programming model are documented in the `Raspberry Pi
+Image Signal Processor (PiSP) Specification document`_
+
+The PiSP Back End ISP processes images in tiles. The handling of image
+tessellation and the computation of low-level configuration parameters is
+realized by a free software library called `libpisp
+<https://github.com/raspberrypi/libpisp>`_.
+
+The full image processing pipeline, which involves capturing RAW Bayer data from
+an image sensor through a MIPI CSI-2 compatible capture interface, storing them
+in DRAM memory and processing them in the PiSP Back End to obtain images usable
+by an application is implemented in `libcamera <https://libcamera.org>`_ as
+part of the Raspberry Pi platform support.
+
+The pisp-be driver
+==================
+
+The Raspberry Pi PiSP Back End (pisp-be) driver is located under
+drivers/media/platform/raspberrypi/pisp-be. It uses the `V4L2 API` to register
+a number of video capture and output devices, the `V4L2 subdev API` to register
+a subdevice for the ISP that connects the video devices in a single media graph
+realized using the `Media Controller (MC) API`.
+
+The media topology registered by the `pisp-be` driver is represented below:
+
+.. _pips-be-topology:
+
+.. kernel-figure:: raspberrypi-pisp-be.dot
+ :alt: Diagram of the default media pipeline topology
+ :align: center
+
+
+The media graph registers the following video device nodes:
+
+- pispbe-input: output device for images to be submitted to the ISP for
+ processing.
+- pispbe-tdn_input: output device for temporal denoise.
+- pispbe-stitch_input: output device for image stitching (HDR).
+- pispbe-output0: first capture device for processed images.
+- pispbe-output1: second capture device for processed images.
+- pispbe-tdn_output: capture device for temporal denoise.
+- pispbe-stitch_output: capture device for image stitching (HDR).
+- pispbe-config: output device for ISP configuration parameters.
+
+pispbe-input
+------------
+
+Images to be processed by the ISP are queued to the `pispbe-input` output device
+node. For a list of image formats supported as input to the ISP refer to the
+`Raspberry Pi Image Signal Processor (PiSP) Specification document`_.
+
+pispbe-tdn_input, pispbe-tdn_output
+-----------------------------------
+
+The `pispbe-tdn_input` output video device receives images to be processed by
+the temporal denoise block which are captured from the `pispbe-tdn_output`
+capture video device. Userspace is responsible for maintaining queues on both
+devices, and ensuring that buffers completed on the output are queued to the
+input.
+
+pispbe-stitch_input, pispbe-stitch_output
+-----------------------------------------
+
+To realize HDR (high dynamic range) image processing the image stitching and
+tonemapping blocks are used. The `pispbe-stitch_output` writes images to memory
+and the `pispbe-stitch_input` receives the previously written frame to process
+it along with the current input image. Userspace is responsible for maintaining
+queues on both devices, and ensuring that buffers completed on the output are
+queued to the input.
+
+pispbe-output0, pispbe-output1
+------------------------------
+
+The two capture devices write to memory the pixel data as processed by the ISP.
+
+pispbe-config
+-------------
+
+The `pispbe-config` output video devices receives a buffer of configuration
+parameters that define the desired image processing to be performed by the ISP.
+
+The format of the ISP configuration parameter is defined by
+:c:type:`pisp_be_tiles_config` C structure and the meaning of each parameter is
+described in the `Raspberry Pi Image Signal Processor (PiSP) Specification
+document`_.
+
+ISP configuration
+=================
+
+The ISP configuration is described solely by the content of the parameters
+buffer. The only parameter that userspace needs to configure using the V4L2 API
+is the image format on the output and capture video devices for validation of
+the content of the parameters buffer.
+
+.. _Raspberry Pi Image Signal Processor (PiSP) Specification document: https://datasheets.raspberrypi.com/camera/raspberry-pi-image-signal-processor-specification.pdf
diff --git a/Documentation/admin-guide/media/raspberrypi-rp1-cfe.dot b/Documentation/admin-guide/media/raspberrypi-rp1-cfe.dot
new file mode 100644
index 000000000000..7717f2291049
--- /dev/null
+++ b/Documentation/admin-guide/media/raspberrypi-rp1-cfe.dot
@@ -0,0 +1,27 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0} | csi2\n/dev/v4l-subdev0 | {<port1> 1 | <port2> 2 | <port3> 3 | <port4> 4}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port1 -> n00000011 [style=dashed]
+ n00000001:port1 -> n00000007:port0
+ n00000001:port2 -> n00000015
+ n00000001:port2 -> n00000007:port0 [style=dashed]
+ n00000001:port3 -> n00000019 [style=dashed]
+ n00000001:port3 -> n00000007:port0 [style=dashed]
+ n00000001:port4 -> n0000001d [style=dashed]
+ n00000001:port4 -> n00000007:port0 [style=dashed]
+ n00000007 [label="{{<port0> 0 | <port1> 1} | pisp-fe\n/dev/v4l-subdev1 | {<port2> 2 | <port3> 3 | <port4> 4}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000007:port2 -> n00000021
+ n00000007:port3 -> n00000025 [style=dashed]
+ n00000007:port4 -> n00000029
+ n0000000d [label="{imx219 6-0010\n/dev/v4l-subdev2 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n0000000d:port0 -> n00000001:port0 [style=bold]
+ n00000011 [label="rp1-cfe-csi2-ch0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n00000015 [label="rp1-cfe-csi2-ch1\n/dev/video1", shape=box, style=filled, fillcolor=yellow]
+ n00000019 [label="rp1-cfe-csi2-ch2\n/dev/video2", shape=box, style=filled, fillcolor=yellow]
+ n0000001d [label="rp1-cfe-csi2-ch3\n/dev/video3", shape=box, style=filled, fillcolor=yellow]
+ n00000021 [label="rp1-cfe-fe-image0\n/dev/video4", shape=box, style=filled, fillcolor=yellow]
+ n00000025 [label="rp1-cfe-fe-image1\n/dev/video5", shape=box, style=filled, fillcolor=yellow]
+ n00000029 [label="rp1-cfe-fe-stats\n/dev/video6", shape=box, style=filled, fillcolor=yellow]
+ n0000002d [label="rp1-cfe-fe-config\n/dev/video7", shape=box, style=filled, fillcolor=yellow]
+ n0000002d -> n00000007:port1
+}
diff --git a/Documentation/admin-guide/media/raspberrypi-rp1-cfe.rst b/Documentation/admin-guide/media/raspberrypi-rp1-cfe.rst
new file mode 100644
index 000000000000..668d978a9875
--- /dev/null
+++ b/Documentation/admin-guide/media/raspberrypi-rp1-cfe.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Raspberry Pi PiSP Camera Front End (rp1-cfe)
+============================================
+
+The PiSP Camera Front End
+=========================
+
+The PiSP Camera Front End (CFE) is a module which combines a CSI-2 receiver with
+a simple ISP, called the Front End (FE).
+
+The CFE has four DMA engines and can write frames from four separate streams
+received from the CSI-2 to the memory. One of those streams can also be routed
+directly to the FE, which can do minimal image processing, write two versions
+(e.g. non-scaled and downscaled versions) of the received frames to memory and
+provide statistics of the received frames.
+
+The FE registers are documented in the `Raspberry Pi Image Signal Processor
+(ISP) Specification document
+<https://datasheets.raspberrypi.com/camera/raspberry-pi-image-signal-processor-specification.pdf>`_,
+and example code for FE can be found in `libpisp
+<https://github.com/raspberrypi/libpisp>`_.
+
+The rp1-cfe driver
+==================
+
+The Raspberry Pi PiSP Camera Front End (rp1-cfe) driver is located under
+drivers/media/platform/raspberrypi/rp1-cfe. It uses the `V4L2 API` to register
+a number of video capture and output devices, the `V4L2 subdev API` to register
+subdevices for the CSI-2 received and the FE that connects the video devices in
+a single media graph realized using the `Media Controller (MC) API`.
+
+The media topology registered by the `rp1-cfe` driver, in this particular
+example connected to an imx219 sensor, is the following one:
+
+.. _rp1-cfe-topology:
+
+.. kernel-figure:: raspberrypi-rp1-cfe.dot
+ :alt: Diagram of an example media pipeline topology
+ :align: center
+
+The media graph contains the following video device nodes:
+
+- rp1-cfe-csi2-ch0: capture device for the first CSI-2 stream
+- rp1-cfe-csi2-ch1: capture device for the second CSI-2 stream
+- rp1-cfe-csi2-ch2: capture device for the third CSI-2 stream
+- rp1-cfe-csi2-ch3: capture device for the fourth CSI-2 stream
+- rp1-cfe-fe-image0: capture device for the first FE output
+- rp1-cfe-fe-image1: capture device for the second FE output
+- rp1-cfe-fe-stats: capture device for the FE statistics
+- rp1-cfe-fe-config: output device for FE configuration
+
+rp1-cfe-csi2-chX
+----------------
+
+The rp1-cfe-csi2-chX capture devices are normal V4L2 capture devices which
+can be used to capture video frames or metadata received from the CSI-2.
+
+rp1-cfe-fe-image0, rp1-cfe-fe-image1
+------------------------------------
+
+The rp1-cfe-fe-image0 and rp1-cfe-fe-image1 capture devices are used to write
+the processed frames to memory.
+
+rp1-cfe-fe-stats
+----------------
+
+The format of the FE statistics buffer is defined by
+:c:type:`pisp_statistics` C structure and the meaning of each parameter is
+described in the `PiSP specification` document.
+
+rp1-cfe-fe-config
+-----------------
+
+The format of the FE configuration buffer is defined by
+:c:type:`pisp_fe_config` C structure and the meaning of each parameter is
+described in the `PiSP specification` document.
diff --git a/Documentation/admin-guide/media/rkcif-rk3568-vicap.dot b/Documentation/admin-guide/media/rkcif-rk3568-vicap.dot
new file mode 100644
index 000000000000..3fac59335459
--- /dev/null
+++ b/Documentation/admin-guide/media/rkcif-rk3568-vicap.dot
@@ -0,0 +1,8 @@
+digraph board {
+ rankdir=TB
+ n00000001 [label="{{<port0> 0} | rkcif-dvp0\n/dev/v4l-subdev0 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000001:port1 -> n00000004
+ n00000004 [label="rkcif-dvp0-id0\n/dev/video0", shape=box, style=filled, fillcolor=yellow]
+ n00000025 [label="{{} | it6801 2-0048\n/dev/v4l-subdev1 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
+ n00000025:port0 -> n00000001:port0
+}
diff --git a/Documentation/admin-guide/media/rkcif.rst b/Documentation/admin-guide/media/rkcif.rst
new file mode 100644
index 000000000000..2558c121abc4
--- /dev/null
+++ b/Documentation/admin-guide/media/rkcif.rst
@@ -0,0 +1,79 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Rockchip Camera Interface (CIF)
+=========================================
+
+Introduction
+============
+
+The Rockchip Camera Interface (CIF) is featured in many Rockchip SoCs in
+different variants.
+The different variants are combinations of common building blocks, such as
+
+* INTERFACE blocks of different types, namely
+
+ * the Digital Video Port (DVP, a parallel data interface)
+ * the interface block for the MIPI CSI-2 receiver
+
+* CROP units
+
+* MIPI CSI-2 receiver (not available on all variants): This unit is referred
+ to as MIPI CSI HOST in the Rockchip documentation.
+ Technically, it is a separate hardware block, but it is strongly coupled to
+ the CIF and therefore included here.
+
+* MUX units (not available on all variants) that pass the video data to an
+ image signal processor (ISP)
+
+* SCALE units (not available on all variants)
+
+* DMA engines that transfer video data into system memory using a
+ double-buffering mechanism called ping-pong mode
+
+* Support for four streams per INTERFACE block (not available on all
+ variants), e.g., for MIPI CSI-2 Virtual Channels (VCs)
+
+This document describes the different variants of the CIF, their hardware
+layout, as well as their representation in the media controller centric rkcif
+device driver, which is located under drivers/media/platform/rockchip/rkcif.
+
+Variants
+========
+
+Rockchip PX30 Video Input Processor (VIP)
+-----------------------------------------
+
+The PX30 Video Input Processor (VIP) features a digital video port that accepts
+parallel video data or BT.656.
+Since these protocols do not feature multiple streams, the VIP has one DMA
+engine that transfers the input video data into system memory.
+
+The rkcif driver represents this hardware variant by exposing one V4L2 subdevice
+(the DVP INTERFACE/CROP block) and one V4L2 device (the DVP DMA engine).
+
+Rockchip RK3568 Video Capture (VICAP)
+-------------------------------------
+
+The RK3568 Video Capture (VICAP) unit features a digital video port and a MIPI
+CSI-2 receiver that can receive video data independently.
+The DVP accepts parallel video data, BT.656 and BT.1120.
+Since the BT.1120 protocol may feature more than one stream, the RK3568 VICAP
+DVP features four DMA engines that can capture different streams.
+Similarly, the RK3568 VICAP MIPI CSI-2 receiver features four DMA engines to
+handle different Virtual Channels (VCs).
+
+The rkcif driver represents this hardware variant by exposing up the following
+V4L2 subdevices:
+
+* rkcif-dvp0: INTERFACE/CROP block for the DVP
+
+and the following video devices:
+
+* rkcif-dvp0-id0: The support for multiple streams on the DVP is not yet
+ implemented, as it is hard to find test hardware. Thus, this video device
+ represents the first DMA engine of the RK3568 DVP.
+
+.. kernel-figure:: rkcif-rk3568-vicap.dot
+ :alt: Topology of the RK3568 Video Capture (VICAP) unit
+ :align: center
diff --git a/Documentation/admin-guide/media/rkisp1.rst b/Documentation/admin-guide/media/rkisp1.rst
index 6f14d9561fa5..6c878c71442f 100644
--- a/Documentation/admin-guide/media/rkisp1.rst
+++ b/Documentation/admin-guide/media/rkisp1.rst
@@ -114,11 +114,18 @@ to be applied to the hardware during a video stream, allowing userspace
to dynamically modify values such as black level, cross talk corrections
and others.
-The buffer format is defined by struct :c:type:`rkisp1_params_cfg`, and
-userspace should set
+The ISP driver supports two different parameters configuration methods, the
+`fixed parameters format` or the `extensible parameters format`.
+
+When using the `fixed parameters` method the buffer format is defined by struct
+:c:type:`rkisp1_params_cfg`, and userspace should set
:ref:`V4L2_META_FMT_RK_ISP1_PARAMS <v4l2-meta-fmt-rk-isp1-params>` as the
dataformat.
+When using the `extensible parameters` method the buffer format is defined by
+struct :c:type:`rkisp1_ext_params_cfg`, and userspace should set
+:ref:`V4L2_META_FMT_RK_ISP1_EXT_PARAMS <v4l2-meta-fmt-rk-isp1-ext-params>` as
+the dataformat.
Capturing Video Frames Example
==============================
diff --git a/Documentation/admin-guide/media/saa7134.rst b/Documentation/admin-guide/media/saa7134.rst
index 51eae7eb5ab7..18d7cbc897db 100644
--- a/Documentation/admin-guide/media/saa7134.rst
+++ b/Documentation/admin-guide/media/saa7134.rst
@@ -67,7 +67,7 @@ Changes / Fixes
Please mail to linux-media AT vger.kernel.org unified diffs against
the linux media git tree:
- https://git.linuxtv.org/media_tree.git/
+ https://git.linuxtv.org/media.git/
This is done by committing a patch at a clone of the git tree and
submitting the patch using ``git send-email``. Don't forget to
diff --git a/Documentation/admin-guide/media/si4713.rst b/Documentation/admin-guide/media/si4713.rst
index be8e6b49b7b4..85dcf1cd2df8 100644
--- a/Documentation/admin-guide/media/si4713.rst
+++ b/Documentation/admin-guide/media/si4713.rst
@@ -13,7 +13,7 @@ Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
Information about the Device
----------------------------
-This chip is a Silicon Labs product. It is a I2C device, currently on 0x63 address.
+This chip is a Silicon Labs product. It is an I2C device, currently on 0x63 address.
Basically, it has transmission and signal noise level measurement features.
The Si4713 integrates transmit functions for FM broadcast stereo transmission.
@@ -28,7 +28,7 @@ Users must comply with local regulations on radio frequency (RF) transmission.
Device driver description
-------------------------
-There are two modules to handle this device. One is a I2C device driver
+There are two modules to handle this device. One is an I2C device driver
and the other is a platform driver.
The I2C device driver exports a v4l2-subdev interface to the kernel.
@@ -113,7 +113,7 @@ Here is a summary of them:
- acomp_attack_time - Sets the attack time for audio dynamic range control.
- acomp_release_time - Sets the release time for audio dynamic range control.
-* Limiter setups audio deviation limiter feature. Once a over deviation occurs,
+* Limiter sets up the audio deviation limiter feature. Once an over deviation occurs,
it is possible to adjust the front-end gain of the audio input and always
prevent over deviation.
diff --git a/Documentation/admin-guide/media/tuner-cardlist.rst b/Documentation/admin-guide/media/tuner-cardlist.rst
index 362617c59c5d..65ecf48ddf24 100644
--- a/Documentation/admin-guide/media/tuner-cardlist.rst
+++ b/Documentation/admin-guide/media/tuner-cardlist.rst
@@ -97,4 +97,6 @@ Tuner number Card name
89 Sony BTF-PG472Z PAL/SECAM
90 Sony BTF-PK467Z NTSC-M-JP
91 Sony BTF-PB463Z NTSC-M
+92 Silicon Labs Si2157 tuner
+93 Tena TNF931D-DFDR1
============ =====================================================
diff --git a/Documentation/admin-guide/media/v4l-drivers.rst b/Documentation/admin-guide/media/v4l-drivers.rst
index f4bb2605f07e..393f83e8dc4d 100644
--- a/Documentation/admin-guide/media/v4l-drivers.rst
+++ b/Documentation/admin-guide/media/v4l-drivers.rst
@@ -10,20 +10,25 @@ Video4Linux (V4L) driver-specific documentation
:maxdepth: 2
bttv
+ c3-isp
cafe_ccic
cx88
fimc
imx
imx7
ipu3
+ ipu6-isys
ivtv
+ mali-c55
mgb4
omap3isp
- omap4_camera
philips
qcom_camss
+ raspberrypi-pisp-be
rcar-fdp1
+ rkcif
rkisp1
+ raspberrypi-rp1-cfe
saa7134
si470x
si4713
diff --git a/Documentation/admin-guide/media/visl.rst b/Documentation/admin-guide/media/visl.rst
index db1ef29438e1..cd45145cde68 100644
--- a/Documentation/admin-guide/media/visl.rst
+++ b/Documentation/admin-guide/media/visl.rst
@@ -49,6 +49,10 @@ Module parameters
visl_dprintk_frame_start, visl_dprintk_nframes, but controls the dumping of
buffer data through debugfs instead.
+- tpg_verbose: Write extra information on each output frame to ease debugging
+ the API. When set to true, the output frames are not stable for a given input
+ as some information like pointers or queue status will be added to them.
+
What is the default use case for this driver?
---------------------------------------------
@@ -57,8 +61,12 @@ This assumes that a working client is run against visl and that the ftrace and
OUTPUT buffer data is subsequently used to debug a work-in-progress
implementation.
-Information on reference frames, their timestamps, the status of the OUTPUT and
-CAPTURE queues and more can be read directly from the CAPTURE buffers.
+Even though no video decoding is actually done, the output frames can be used
+against a reference for a given input, except if tpg_verbose is set to true.
+
+Depending on the tpg_verbose parameter value, information on reference frames,
+their timestamps, the status of the OUTPUT and CAPTURE queues and more can be
+read directly from the CAPTURE buffers.
Supported codecs
----------------
diff --git a/Documentation/admin-guide/media/vivid.rst b/Documentation/admin-guide/media/vivid.rst
index 58ac25b2c385..034ca7c77fb9 100644
--- a/Documentation/admin-guide/media/vivid.rst
+++ b/Documentation/admin-guide/media/vivid.rst
@@ -60,7 +60,7 @@ all configurable using the following module options:
- node_types:
which devices should each driver instance create. An array of
- hexadecimal values, one for each instance. The default is 0x1d3d.
+ hexadecimal values, one for each instance. The default is 0xe1d3d.
Each value is a bitmask with the following meaning:
- bit 0: Video Capture node
@@ -302,6 +302,15 @@ all configurable using the following module options:
- 0: forbid hints
- 1: allow hints
+- supports_requests:
+
+ specifies if the device should support the Request API. There are
+ three possible values, default is 1:
+
+ - 0: no request
+ - 1: supports requests
+ - 2: requires requests
+
Taken together, all these module options allow you to precisely customize
the driver behavior and test your application with all sorts of permutations.
It is also very suitable to emulate hardware that is not yet available, e.g.
@@ -313,13 +322,13 @@ Video Capture
This is probably the most frequently used feature. The video capture device
can be configured by using the module options num_inputs, input_types and
-ccs_cap_mode (see section 1 for more detailed information), but by default
-four inputs are configured: a webcam, a TV tuner, an S-Video and an HDMI
-input, one input for each input type. Those are described in more detail
-below.
+ccs_cap_mode (see "Configuring the driver" for more detailed information),
+but by default four inputs are configured: a webcam, a TV tuner, an S-Video
+and an HDMI input, one input for each input type. Those are described in more
+detail below.
Special attention has been given to the rate at which new frames become
-available. The jitter will be around 1 jiffie (that depends on the HZ
+available. The jitter will be around 1 jiffy (that depends on the HZ
configuration of your kernel, so usually 1/100, 1/250 or 1/1000 of a second),
but the long-term behavior is exactly following the framerate. So a
framerate of 59.94 Hz is really different from 60 Hz. If the framerate
@@ -434,10 +443,10 @@ Video Output
------------
The video output device can be configured by using the module options
-num_outputs, output_types and ccs_out_mode (see section 1 for more detailed
-information), but by default two outputs are configured: an S-Video and an
-HDMI input, one output for each output type. Those are described in more detail
-below.
+num_outputs, output_types and ccs_out_mode (see "Configuring the driver"
+for more detailed information), but by default two outputs are configured:
+an S-Video and an HDMI input, one output for each output type. Those are
+described in more detail below.
Like with video capture the framerate is also exact in the long term.
@@ -1011,11 +1020,6 @@ Digital Video Controls
affects the reported colorspace since DVI_D outputs will always use
sRGB.
-- Display Present:
-
- sets the presence of a "display" on the HDMI output. This affects
- the tx_edid_present, tx_hotplug and tx_rxsense controls.
-
FM Radio Receiver Controls
~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1130,35 +1134,34 @@ Metadata Capture Controls
if set, then the generated metadata stream contains Source Clock information.
-Video, VBI and RDS Looping
---------------------------
-The vivid driver supports looping of video output to video input, VBI output
-to VBI input and RDS output to RDS input. For video/VBI looping this emulates
-as if a cable was hooked up between the output and input connector. So video
-and VBI looping is only supported between S-Video and HDMI inputs and outputs.
-VBI is only valid for S-Video as it makes no sense for HDMI.
+Video, Sliced VBI and HDMI CEC Looping
+--------------------------------------
-Since radio is wireless this looping always happens if the radio receiver
-frequency is close to the radio transmitter frequency. In that case the radio
-transmitter will 'override' the emulated radio stations.
-
-Looping is currently supported only between devices created by the same
-vivid driver instance.
+Video Looping functionality is supported for devices created by the same
+vivid driver instance, as well as across multiple instances of the vivid driver.
+The vivid driver supports looping of video and Sliced VBI data between an S-Video output
+and an S-Video input. It also supports looping of video and HDMI CEC data between an
+HDMI output and an HDMI input.
+To enable looping, set the 'HDMI/S-Video XXX-N Is Connected To' control(s) to select
+whether an input uses the Test Pattern Generator, or is disconnected, or is connected
+to an output. An input can be connected to an output from any vivid instance.
+The inputs and outputs are numbered XXX-N where XXX is the vivid instance number
+(see module option n_devs). If there is only one vivid instance (the default), then
+XXX will be 000. And N is the Nth S-Video/HDMI input or output of that instance.
+If vivid is loaded without module options, then you can connect the S-Video 000-0 input
+to the S-Video 000-0 output, or the HDMI 000-0 input to the HDMI 000-0 output.
+This is the equivalent of connecting or disconnecting a cable between an input and an
+output in a physical device.
-Video and Sliced VBI looping
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If an 'HDMI/S-Video XXX-N Is Connected To' control selected an output, then the video
+output will be looped to the video input provided that:
-The way to enable video/VBI looping is currently fairly crude. A 'Loop Video'
-control is available in the "Vivid" control class of the video
-capture and VBI capture devices. When checked the video looping will be enabled.
-Once enabled any video S-Video or HDMI input will show a static test pattern
-until the video output has started. At that time the video output will be
-looped to the video input provided that:
+- the currently selected input matches the input indicated by the control name.
-- the input type matches the output type. So the HDMI input cannot receive
- video from the S-Video output.
+- in the vivid instance of the output connector, the currently selected output matches
+ the output indicated by the control's value.
- the video resolution of the video input must match that of the video output.
So it is not possible to loop a 50 Hz (720x576) S-Video output to a 60 Hz
@@ -1185,6 +1188,8 @@ looped to the video input provided that:
"DV Timings Signal Mode" for the HDMI input should be configured so that a
valid signal is passed to the video input.
+If any condition is not valid, then the 'Noise' test pattern is shown.
+
The framerates do not have to match, although this might change in the future.
By default you will see the OSD text superimposed on top of the looped video.
@@ -1198,17 +1203,26 @@ and WSS (50 Hz formats) VBI data is looped. Teletext VBI data is not looped.
Radio & RDS Looping
-~~~~~~~~~~~~~~~~~~~
-
-As mentioned in section 6 the radio receiver emulates stations are regular
-frequency intervals. Depending on the frequency of the radio receiver a
-signal strength value is calculated (this is returned by VIDIOC_G_TUNER).
-However, it will also look at the frequency set by the radio transmitter and
-if that results in a higher signal strength than the settings of the radio
-transmitter will be used as if it was a valid station. This also includes
-the RDS data (if any) that the transmitter 'transmits'. This is received
-faithfully on the receiver side. Note that when the driver is loaded the
-frequencies of the radio receiver and transmitter are not identical, so
+-------------------
+
+The vivid driver supports looping of RDS output to RDS input.
+
+Since radio is wireless this looping always happens if the radio receiver
+frequency is close to the radio transmitter frequency. In that case the radio
+transmitter will 'override' the emulated radio stations.
+
+RDS looping is currently supported only between devices created by the same
+vivid driver instance.
+
+As mentioned in the "Radio Receiver" section, the radio receiver emulates
+stations at regular frequency intervals. Depending on the frequency of the
+radio receiver a signal strength value is calculated (this is returned by
+VIDIOC_G_TUNER). However, it will also look at the frequency set by the radio
+transmitter and if that results in a higher signal strength than the settings
+of the radio transmitter will be used as if it was a valid station. This also
+includes the RDS data (if any) that the transmitter 'transmits'. This is
+received faithfully on the receiver side. Note that when the driver is loaded
+the frequencies of the radio receiver and transmitter are not identical, so
initially no looping takes place.
@@ -1218,8 +1232,8 @@ Cropping, Composing, Scaling
This driver supports cropping, composing and scaling in any combination. Normally
which features are supported can be selected through the Vivid controls,
but it is also possible to hardcode it when the module is loaded through the
-ccs_cap_mode and ccs_out_mode module options. See section 1 on the details of
-these module options.
+ccs_cap_mode and ccs_out_mode module options. See "Configuring the driver" on
+the details of these module options.
This allows you to test your application for all these variations.
@@ -1260,7 +1274,8 @@ is set, then the alpha component is only used for the color red and set to
The driver has to be configured to support the multiplanar formats. By default
the driver instances are single-planar. This can be changed by setting the
-multiplanar module option, see section 1 for more details on that option.
+multiplanar module option, see "Configuring the driver" for more details on that
+option.
If the driver instance is using the multiplanar formats/API, then the first
single planar format (YUYV) and the multiplanar NV16M and NV61M formats the
@@ -1270,74 +1285,6 @@ data_offset to be non-zero, so this is a useful feature for testing applications
Video output will also honor any data_offset that the application set.
-Capture Overlay
----------------
-
-Note: capture overlay support is implemented primarily to test the existing
-V4L2 capture overlay API. In practice few if any GPUs support such overlays
-anymore, and neither are they generally needed anymore since modern hardware
-is so much more capable. By setting flag 0x10000 in the node_types module
-option the vivid driver will create a simple framebuffer device that can be
-used for testing this API. Whether this API should be used for new drivers is
-questionable.
-
-This driver has support for a destructive capture overlay with bitmap clipping
-and list clipping (up to 16 rectangles) capabilities. Overlays are not
-supported for multiplanar formats. It also honors the struct v4l2_window field
-setting: if it is set to FIELD_TOP or FIELD_BOTTOM and the capture setting is
-FIELD_ALTERNATE, then only the top or bottom fields will be copied to the overlay.
-
-The overlay only works if you are also capturing at that same time. This is a
-vivid limitation since it copies from a buffer to the overlay instead of
-filling the overlay directly. And if you are not capturing, then no buffers
-are available to fill.
-
-In addition, the pixelformat of the capture format and that of the framebuffer
-must be the same for the overlay to work. Otherwise VIDIOC_OVERLAY will return
-an error.
-
-In order to really see what it going on you will need to create two vivid
-instances: the first with a framebuffer enabled. You configure the capture
-overlay of the second instance to use the framebuffer of the first, then
-you start capturing in the second instance. For the first instance you setup
-the output overlay for the video output, turn on video looping and capture
-to see the blended framebuffer overlay that's being written to by the second
-instance. This setup would require the following commands:
-
-.. code-block:: none
-
- $ sudo modprobe vivid n_devs=2 node_types=0x10101,0x1
- $ v4l2-ctl -d1 --find-fb
- /dev/fb1 is the framebuffer associated with base address 0x12800000
- $ sudo v4l2-ctl -d2 --set-fbuf fb=1
- $ v4l2-ctl -d1 --set-fbuf fb=1
- $ v4l2-ctl -d0 --set-fmt-video=pixelformat='AR15'
- $ v4l2-ctl -d1 --set-fmt-video-out=pixelformat='AR15'
- $ v4l2-ctl -d2 --set-fmt-video=pixelformat='AR15'
- $ v4l2-ctl -d0 -i2
- $ v4l2-ctl -d2 -i2
- $ v4l2-ctl -d2 -c horizontal_movement=4
- $ v4l2-ctl -d1 --overlay=1
- $ v4l2-ctl -d0 -c loop_video=1
- $ v4l2-ctl -d2 --stream-mmap --overlay=1
-
-And from another console:
-
-.. code-block:: none
-
- $ v4l2-ctl -d1 --stream-out-mmap
-
-And yet another console:
-
-.. code-block:: none
-
- $ qv4l2
-
-and start streaming.
-
-As you can see, this is not for the faint of heart...
-
-
Output Overlay
--------------
@@ -1396,7 +1343,7 @@ Some Future Improvements
Just as a reminder and in no particular order:
- Add a virtual alsa driver to test audio
-- Add virtual sub-devices and media controller support
+- Add virtual sub-devices
- Some support for testing compressed video
- Add support to loop raw VBI output to raw VBI input
- Add support to loop teletext sliced VBI output to VBI input
@@ -1405,12 +1352,10 @@ Just as a reminder and in no particular order:
- Add ARGB888 overlay support: better testing of the alpha channel
- Improve pixel aspect support in the tpg code by passing a real v4l2_fract
- Use per-queue locks and/or per-device locks to improve throughput
-- Add support to loop from a specific output to a specific input across
- vivid instances
- The SDR radio should use the same 'frequencies' for stations as the normal
radio receiver, and give back noise if the frequency doesn't match up with
a station frequency
- Make a thread for the RDS generation, that would help in particular for the
"Controls" RDS Rx I/O Mode as the read-only RDS controls could be updated
in real-time.
-- Changing the EDID should cause hotplug detect emulation to happen.
+- Changing the EDID doesn't wait 100 ms before setting the HPD signal.
diff --git a/Documentation/admin-guide/mm/cma_debugfs.rst b/Documentation/admin-guide/mm/cma_debugfs.rst
index 7367e6294ef6..4120e9cb0cd5 100644
--- a/Documentation/admin-guide/mm/cma_debugfs.rst
+++ b/Documentation/admin-guide/mm/cma_debugfs.rst
@@ -12,10 +12,16 @@ its CMA name like below:
The structure of the files created under that directory is as follows:
- - [RO] base_pfn: The base PFN (Page Frame Number) of the zone.
+ - [RO] base_pfn: The base PFN (Page Frame Number) of the CMA area.
+ This is the same as ranges/0/base_pfn.
- [RO] count: Amount of memory in the CMA area.
- [RO] order_per_bit: Order of pages represented by one bit.
- - [RO] bitmap: The bitmap of page states in the zone.
+ - [RO] bitmap: The bitmap of allocated pages in the area.
+ This is the same as ranges/0/base_pfn.
+ - [RO] ranges/N/base_pfn: The base PFN of contiguous range N
+ in the CMA area.
+ - [RO] ranges/N/bitmap: The bit map of allocated pages in
+ range N in the CMA area.
- [WO] alloc: Allocate N pages from that CMA area. For example::
echo 5 > <debugfs>/cma/<cma_name>/alloc
diff --git a/Documentation/admin-guide/mm/damon/index.rst b/Documentation/admin-guide/mm/damon/index.rst
index 33d37bb2fb4e..3ce3164480c7 100644
--- a/Documentation/admin-guide/mm/damon/index.rst
+++ b/Documentation/admin-guide/mm/damon/index.rst
@@ -1,12 +1,11 @@
.. SPDX-License-Identifier: GPL-2.0
-==========================
-DAMON: Data Access MONitor
-==========================
+================================================================
+DAMON: Data Access MONitoring and Access-aware System Operations
+================================================================
-:doc:`DAMON </mm/damon/index>` allows light-weight data access monitoring.
-Using DAMON, users can analyze the memory access patterns of their systems and
-optimize those.
+:doc:`DAMON </mm/damon/index>` is a Linux kernel subsystem for efficient data
+access monitoring and access-aware system operations.
.. toctree::
:maxdepth: 2
@@ -15,3 +14,4 @@ optimize those.
usage
reclaim
lru_sort
+ stat
diff --git a/Documentation/admin-guide/mm/damon/lru_sort.rst b/Documentation/admin-guide/mm/damon/lru_sort.rst
index 7b0775d281b4..72a943202676 100644
--- a/Documentation/admin-guide/mm/damon/lru_sort.rst
+++ b/Documentation/admin-guide/mm/damon/lru_sort.rst
@@ -211,6 +211,28 @@ End of target memory region in physical address.
The end physical address of memory region that DAMON_LRU_SORT will do work
against. By default, biggest System RAM is used as the region.
+addr_unit
+---------
+
+A scale factor for memory addresses and bytes.
+
+This parameter is for setting and getting the :ref:`address unit
+<damon_design_addr_unit>` parameter of the DAMON instance for DAMON_RECLAIM.
+
+``monitor_region_start`` and ``monitor_region_end`` should be provided in this
+unit. For example, let's suppose ``addr_unit``, ``monitor_region_start`` and
+``monitor_region_end`` are set as ``1024``, ``0`` and ``10``, respectively.
+Then DAMON_LRU_SORT will work for 10 KiB length of physical address range that
+starts from address zero (``[0 * 1024, 10 * 1024)`` in bytes).
+
+Stat parameters having ``bytes_`` prefix are also in this unit. For example,
+let's suppose values of ``addr_unit``, ``bytes_lru_sort_tried_hot_regions`` and
+``bytes_lru_sorted_hot_regions`` are ``1024``, ``42``, and ``32``,
+respectively. Then it means DAMON_LRU_SORT tried to LRU-sort 42 KiB of hot
+memory and successfully LRU-sorted 32 KiB of the memory in total.
+
+If unsure, use only the default value (``1``) and forget about this.
+
kdamond_pid
-----------
diff --git a/Documentation/admin-guide/mm/damon/reclaim.rst b/Documentation/admin-guide/mm/damon/reclaim.rst
index 343e25b252f4..8eba3da8dcee 100644
--- a/Documentation/admin-guide/mm/damon/reclaim.rst
+++ b/Documentation/admin-guide/mm/damon/reclaim.rst
@@ -117,6 +117,33 @@ milliseconds.
1 second by default.
+quota_mem_pressure_us
+---------------------
+
+Desired level of memory pressure-stall time in microseconds.
+
+While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+increases and decreases the effective level of the quota aiming this level of
+memory pressure is incurred. System-wide ``some`` memory PSI in microseconds
+per quota reset interval (``quota_reset_interval_ms``) is collected and
+compared to this value to see if the aim is satisfied. Value zero means
+disabling this auto-tuning feature.
+
+Disabled by default.
+
+quota_autotune_feedback
+-----------------------
+
+User-specifiable feedback for auto-tuning of the effective quota.
+
+While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+increases and decreases the effective level of the quota aiming receiving this
+feedback of value ``10,000`` from the user. DAMON_RECLAIM assumes the feedback
+value and the quota are positively proportional. Value zero means disabling
+this auto-tuning feature.
+
+Disabled by default.
+
wmarks_interval
---------------
@@ -205,6 +232,28 @@ The end physical address of memory region that DAMON_RECLAIM will do work
against. That is, DAMON_RECLAIM will find cold memory regions in this region
and reclaims. By default, biggest System RAM is used as the region.
+addr_unit
+---------
+
+A scale factor for memory addresses and bytes.
+
+This parameter is for setting and getting the :ref:`address unit
+<damon_design_addr_unit>` parameter of the DAMON instance for DAMON_RECLAIM.
+
+``monitor_region_start`` and ``monitor_region_end`` should be provided in this
+unit. For example, let's suppose ``addr_unit``, ``monitor_region_start`` and
+``monitor_region_end`` are set as ``1024``, ``0`` and ``10``, respectively.
+Then DAMON_RECLAIM will work for 10 KiB length of physical address range that
+starts from address zero (``[0 * 1024, 10 * 1024)`` in bytes).
+
+``bytes_reclaim_tried_regions`` and ``bytes_reclaimed_regions`` are also in
+this unit. For example, let's suppose values of ``addr_unit``,
+``bytes_reclaim_tried_regions`` and ``bytes_reclaimed_regions`` are ``1024``,
+``42``, and ``32``, respectively. Then it means DAMON_RECLAIM tried to reclaim
+42 KiB memory and successfully reclaimed 32 KiB memory in total.
+
+If unsure, use only the default value (``1``) and forget about this.
+
skip_anon
---------
diff --git a/Documentation/admin-guide/mm/damon/start.rst b/Documentation/admin-guide/mm/damon/start.rst
index 7aa0071ff1c3..ec8c34b2d32f 100644
--- a/Documentation/admin-guide/mm/damon/start.rst
+++ b/Documentation/admin-guide/mm/damon/start.rst
@@ -7,7 +7,7 @@ Getting Started
This document briefly describes how you can use DAMON by demonstrating its
default user space tool. Please note that this document describes only a part
of its features for brevity. Please refer to the usage `doc
-<https://github.com/awslabs/damo/blob/next/USAGE.md>`_ of the tool for more
+<https://github.com/damonitor/damo/blob/next/USAGE.md>`_ of the tool for more
details.
@@ -26,7 +26,7 @@ User Space Tool
For the demonstration, we will use the default user space tool for DAMON,
called DAMON Operator (DAMO). It is available at
-https://github.com/awslabs/damo. The examples below assume that ``damo`` is on
+https://github.com/damonitor/damo. The examples below assume that ``damo`` is on
your ``$PATH``. It's not mandatory, though.
Because DAMO is using the sysfs interface (refer to :doc:`usage` for the
@@ -34,18 +34,69 @@ detail) of DAMON, you should ensure :doc:`sysfs </filesystems/sysfs>` is
mounted.
+Snapshot Data Access Patterns
+=============================
+
+The commands below show the memory access pattern of a program at the moment of
+the execution. ::
+
+ $ git clone https://github.com/sjp38/masim; cd masim; make
+ $ sudo damo start "./masim ./configs/stairs.cfg --quiet"
+ $ sudo damo report access
+ heatmap: 641111111000000000000000000000000000000000000000000000[...]33333333333333335557984444[...]7
+ # min/max temperatures: -1,840,000,000, 370,010,000, column size: 3.925 MiB
+ 0 addr 86.182 TiB size 8.000 KiB access 0 % age 14.900 s
+ 1 addr 86.182 TiB size 8.000 KiB access 60 % age 0 ns
+ 2 addr 86.182 TiB size 3.422 MiB access 0 % age 4.100 s
+ 3 addr 86.182 TiB size 2.004 MiB access 95 % age 2.200 s
+ 4 addr 86.182 TiB size 29.688 MiB access 0 % age 14.100 s
+ 5 addr 86.182 TiB size 29.516 MiB access 0 % age 16.700 s
+ 6 addr 86.182 TiB size 29.633 MiB access 0 % age 17.900 s
+ 7 addr 86.182 TiB size 117.652 MiB access 0 % age 18.400 s
+ 8 addr 126.990 TiB size 62.332 MiB access 0 % age 9.500 s
+ 9 addr 126.990 TiB size 13.980 MiB access 0 % age 5.200 s
+ 10 addr 126.990 TiB size 9.539 MiB access 100 % age 3.700 s
+ 11 addr 126.990 TiB size 16.098 MiB access 0 % age 6.400 s
+ 12 addr 127.987 TiB size 132.000 KiB access 0 % age 2.900 s
+ total size: 314.008 MiB
+ $ sudo damo stop
+
+The first command of the above example downloads and builds an artificial
+memory access generator program called ``masim``. The second command asks DAMO
+to start the program via the given command and make DAMON monitors the newly
+started process. The third command retrieves the current snapshot of the
+monitored access pattern of the process from DAMON and shows the pattern in a
+human readable format.
+
+The first line of the output shows the relative access temperature (hotness) of
+the regions in a single row hetmap format. Each column on the heatmap
+represents regions of same size on the monitored virtual address space. The
+position of the colun on the row and the number on the column represents the
+relative location and access temperature of the region. ``[...]`` means
+unmapped huge regions on the virtual address spaces. The second line shows
+additional information for better understanding the heatmap.
+
+Each line of the output from the third line shows which virtual address range
+(``addr XX size XX``) of the process is how frequently (``access XX %``)
+accessed for how long time (``age XX``). For example, the evelenth region of
+~9.5 MiB size is being most frequently accessed for last 3.7 seconds. Finally,
+the fourth command stops DAMON.
+
+Note that DAMON can monitor not only virtual address spaces but multiple types
+of address spaces including the physical address space.
+
+
Recording Data Access Patterns
==============================
The commands below record the memory access patterns of a program and save the
monitoring results to a file. ::
- $ git clone https://github.com/sjp38/masim
- $ cd masim; make; ./masim ./configs/zigzag.cfg &
+ $ ./masim ./configs/zigzag.cfg &
$ sudo damo record -o damon.data $(pidof masim)
-The first two lines of the commands download an artificial memory access
-generator program and run it in the background. The generator will repeatedly
+The line of the commands run the artificial memory access
+generator program again. The generator will repeatedly
access two 100 MiB sized memory regions one by one. You can substitute this
with your real workload. The last line asks ``damo`` to record the access
pattern in the ``damon.data`` file.
@@ -57,7 +108,7 @@ Visualizing Recorded Patterns
You can visualize the pattern in a heatmap, showing which memory region
(x-axis) got accessed when (y-axis) and how frequently (number).::
- $ sudo damo report heats --heatmap stdout
+ $ sudo damo report heatmap
22222222222222222222222222222222222222211111111111111111111111111111111111111100
44444444444444444444444444444444444444434444444444444444444444444444444444443200
44444444444444444444444444444444444444433444444444444444444444444444444444444200
@@ -122,6 +173,6 @@ Data Access Pattern Aware Memory Management
Below command makes every memory region of size >=4K that has not accessed for
>=60 seconds in your workload to be swapped out. ::
- $ sudo damo schemes --damos_access_rate 0 0 --damos_sz_region 4K max \
- --damos_age 60s max --damos_action pageout \
- <pid of your workload>
+ $ sudo damo start --damos_access_rate 0 0 --damos_sz_region 4K max \
+ --damos_age 60s max --damos_action pageout \
+ --target_pid <pid of your workload>
diff --git a/Documentation/admin-guide/mm/damon/stat.rst b/Documentation/admin-guide/mm/damon/stat.rst
new file mode 100644
index 000000000000..e5a5a2c4f803
--- /dev/null
+++ b/Documentation/admin-guide/mm/damon/stat.rst
@@ -0,0 +1,86 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Data Access Monitoring Results Stat
+===================================
+
+Data Access Monitoring Results Stat (DAMON_STAT) is a static kernel module that
+is aimed to be used for simple access pattern monitoring. It monitors accesses
+on the system's entire physical memory using DAMON, and provides simplified
+access monitoring results statistics, namely idle time percentiles and
+estimated memory bandwidth.
+
+.. _damon_stat_monitoring_accuracy_overhead:
+
+Monitoring Accuracy and Overhead
+================================
+
+DAMON_STAT uses monitoring intervals :ref:`auto-tuning
+<damon_design_monitoring_intervals_autotuning>` to make its accuracy high and
+overhead minimum. It auto-tunes the intervals aiming 4 % of observable access
+events to be captured in each snapshot, while limiting the resulting sampling
+interval to be 5 milliseconds in minimum and 10 seconds in maximum. On a few
+production server systems, it resulted in consuming only 0.x % single CPU time,
+while capturing reasonable quality of access patterns. The tuning-resulting
+intervals can be retrieved via ``aggr_interval_us`` :ref:`parameter
+<damon_stat_aggr_interval_us>`.
+
+Interface: Module Parameters
+============================
+
+To use this feature, you should first ensure your system is running on a kernel
+that is built with ``CONFIG_DAMON_STAT=y``. The feature can be enabled by
+default at build time, by setting ``CONFIG_DAMON_STAT_ENABLED_DEFAULT`` true.
+
+To let sysadmins enable or disable it at boot and/or runtime, and read the
+monitoring results, DAMON_STAT provides module parameters. Following
+sections are descriptions of the parameters.
+
+enabled
+-------
+
+Enable or disable DAMON_STAT.
+
+You can enable DAMON_STAT by setting the value of this parameter as ``Y``.
+Setting it as ``N`` disables DAMON_STAT. The default value is set by
+``CONFIG_DAMON_STAT_ENABLED_DEFAULT`` build config option.
+
+.. _damon_stat_aggr_interval_us:
+
+aggr_interval_us
+----------------
+
+Auto-tuned aggregation time interval in microseconds.
+
+Users can read the aggregation interval of DAMON that is being used by the
+DAMON instance for DAMON_STAT. It is :ref:`auto-tuned
+<damon_stat_monitoring_accuracy_overhead>` and therefore the value is
+dynamically changed.
+
+estimated_memory_bandwidth
+--------------------------
+
+Estimated memory bandwidth consumption (bytes per second) of the system.
+
+DAMON_STAT reads observed access events on the current DAMON results snapshot
+and converts it to memory bandwidth consumption estimation in bytes per second.
+The resulting metric is exposed to user via this read-only parameter. Because
+DAMON uses sampling, this is only an estimation of the access intensity rather
+than accurate memory bandwidth.
+
+memory_idle_ms_percentiles
+--------------------------
+
+Per-byte idle time (milliseconds) percentiles of the system.
+
+DAMON_STAT calculates how long each byte of the memory was not accessed until
+now (idle time), based on the current DAMON results snapshot. For regions
+having access frequency (nr_accesses) larger than zero, how long the current
+access frequency level was kept multiplied by ``-1`` becomes the idlee time of
+every byte of the region. If a region has zero access frequency (nr_accesses),
+how long the region was keeping the zero access frequency (age) becomes the
+idle time of every byte of the region. Then, DAMON_STAT exposes the
+percentiles of the idle time values via this read-only parameter. Reading the
+parameter returns 101 idle time values in milliseconds, separated by comma.
+Each value represents 0-th, 1st, 2nd, 3rd, ..., 99th and 100th percentile idle
+times.
diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
index 9d23144bf985..9991dad60fcf 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -7,31 +7,25 @@ Detailed Usages
DAMON provides below interfaces for different users.
- *DAMON user space tool.*
- `This <https://github.com/awslabs/damo>`_ is for privileged people such as
+ `This <https://github.com/damonitor/damo>`_ is for privileged people such as
system administrators who want a just-working human-friendly interface.
Using this, users can use the DAMON’s major features in a human-friendly way.
It may not be highly tuned for special cases, though. For more detail,
please refer to its `usage document
- <https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
+ <https://github.com/damonitor/damo/blob/next/USAGE.md>`_.
- *sysfs interface.*
:ref:`This <sysfs_interface>` is for privileged user space programmers who
want more optimized use of DAMON. Using this, users can use DAMON’s major
features by reading from and writing to special sysfs files. Therefore,
you can write and use your personalized DAMON sysfs wrapper programs that
reads/writes the sysfs files instead of you. The `DAMON user space tool
- <https://github.com/awslabs/damo>`_ is one example of such programs.
+ <https://github.com/damonitor/damo>`_ is one example of such programs.
- *Kernel Space Programming Interface.*
:doc:`This </mm/damon/api>` is for kernel space programmers. Using this,
users can utilize every feature of DAMON most flexibly and efficiently by
writing kernel space DAMON application programs for you. You can even extend
DAMON for various address spaces. For detail, please refer to the interface
:doc:`document </mm/damon/api>`.
-- *debugfs interface. (DEPRECATED!)*
- :ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
- <sysfs_interface>`. This is deprecated, so users should move to the
- :ref:`sysfs interface <sysfs_interface>`. If you depend on this and cannot
- move, please report your usecase to damon@lists.linux.dev and
- linux-mm@kvack.org.
.. _sysfs_interface:
@@ -65,34 +59,37 @@ comma (",").
:ref:`/sys/kernel/mm/damon <sysfs_root>`/admin
│ :ref:`kdamonds <sysfs_kdamonds>`/nr_kdamonds
- │ │ :ref:`0 <sysfs_kdamond>`/state,pid
+ │ │ :ref:`0 <sysfs_kdamond>`/state,pid,refresh_ms
│ │ │ :ref:`contexts <sysfs_contexts>`/nr_contexts
- │ │ │ │ :ref:`0 <sysfs_context>`/avail_operations,operations
+ │ │ │ │ :ref:`0 <sysfs_context>`/avail_operations,operations,addr_unit
│ │ │ │ │ :ref:`monitoring_attrs <sysfs_monitoring_attrs>`/
│ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
+ │ │ │ │ │ │ │ intervals_goal/access_bp,aggrs,min_sample_us,max_sample_us
│ │ │ │ │ │ nr_regions/min,max
│ │ │ │ │ :ref:`targets <sysfs_targets>`/nr_targets
- │ │ │ │ │ │ :ref:`0 <sysfs_target>`/pid_target
+ │ │ │ │ │ │ :ref:`0 <sysfs_target>`/pid_target,obsolete_target
│ │ │ │ │ │ │ :ref:`regions <sysfs_regions>`/nr_regions
│ │ │ │ │ │ │ │ :ref:`0 <sysfs_region>`/start,end
│ │ │ │ │ │ │ │ ...
│ │ │ │ │ │ ...
│ │ │ │ │ :ref:`schemes <sysfs_schemes>`/nr_schemes
- │ │ │ │ │ │ :ref:`0 <sysfs_scheme>`/action,apply_interval_us
+ │ │ │ │ │ │ :ref:`0 <sysfs_scheme>`/action,target_nid,apply_interval_us
│ │ │ │ │ │ │ :ref:`access_pattern <sysfs_access_pattern>`/
│ │ │ │ │ │ │ │ sz/min,max
│ │ │ │ │ │ │ │ nr_accesses/min,max
│ │ │ │ │ │ │ │ age/min,max
- │ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms
+ │ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms,effective_bytes
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
│ │ │ │ │ │ │ │ :ref:`goals <sysfs_schemes_quota_goals>`/nr_goals
- │ │ │ │ │ │ │ │ │ 0/target_value,current_value
+ │ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value,nid,path
│ │ │ │ │ │ │ :ref:`watermarks <sysfs_watermarks>`/metric,interval_us,high,mid,low
- │ │ │ │ │ │ │ :ref:`filters <sysfs_filters>`/nr_filters
- │ │ │ │ │ │ │ │ 0/type,matching,memcg_id
- │ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
+ │ │ │ │ │ │ │ :ref:`{core_,ops_,}filters <sysfs_filters>`/nr_filters
+ │ │ │ │ │ │ │ │ 0/type,matching,allow,memcg_path,addr_start,addr_end,target_idx,min,max
+ │ │ │ │ │ │ │ :ref:`dests <damon_sysfs_dests>`/nr_dests
+ │ │ │ │ │ │ │ │ 0/id,weight
+ │ │ │ │ │ │ │ :ref:`stats <sysfs_schemes_stats>`/nr_tried,sz_tried,nr_applied,sz_applied,sz_ops_filter_passed,qt_exceeds
│ │ │ │ │ │ │ :ref:`tried_regions <sysfs_schemes_tried_regions>`/total_bytes
- │ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age
+ │ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age,sz_filter_passed
│ │ │ │ │ │ │ │ ...
│ │ │ │ │ │ ...
│ │ │ │ ...
@@ -126,8 +123,8 @@ kdamond.
kdamonds/<N>/
-------------
-In each kdamond directory, two files (``state`` and ``pid``) and one directory
-(``contexts``) exist.
+In each kdamond directory, three files (``state``, ``pid`` and ``refresh_ms``)
+and one directory (``contexts``) exist.
Reading ``state`` returns ``on`` if the kdamond is currently running, or
``off`` if it is not running.
@@ -137,7 +134,13 @@ Users can write below commands for the kdamond to the ``state`` file.
- ``on``: Start running.
- ``off``: Stop running.
- ``commit``: Read the user inputs in the sysfs files except ``state`` file
- again.
+ again. Monitoring :ref:`target region <sysfs_regions>` inputs are also be
+ ignored if no target region is specified.
+- ``update_tuned_intervals``: Update the contents of ``sample_us`` and
+ ``aggr_us`` files of the kdamond with the auto-tuning applied ``sampling
+ interval`` and ``aggregation interval`` for the files. Please refer to
+ :ref:`intervals_goal section <damon_usage_sysfs_monitoring_intervals_goal>`
+ for more details.
- ``commit_schemes_quota_goals``: Read the DAMON-based operation schemes'
:ref:`quota goals <sysfs_schemes_quota_goals>`.
- ``update_schemes_stats``: Update the contents of stats files for each
@@ -153,9 +156,19 @@ Users can write below commands for the kdamond to the ``state`` file.
- ``clear_schemes_tried_regions``: Clear the DAMON-based operating scheme
action tried regions directory for each DAMON-based operation scheme of the
kdamond.
+- ``update_schemes_effective_quotas``: Update the contents of
+ ``effective_bytes`` files for each DAMON-based operation scheme of the
+ kdamond. For more details, refer to :ref:`quotas directory <sysfs_quotas>`.
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
+Users can ask the kernel to periodically update files showing auto-tuned
+parameters and DAMOS stats instead of manually writing
+``update_tuned_intervals`` like keywords to ``state`` file. For this, users
+should write the desired update time interval in milliseconds to ``refresh_ms``
+file. If the interval is zero, the periodic update is disabled. Reading the
+file shows currently set time interval.
+
``contexts`` directory contains files for controlling the monitoring contexts
that this kdamond will execute.
@@ -176,28 +189,26 @@ details). At the moment, only one context per kdamond is supported, so only
contexts/<N>/
-------------
-In each context directory, two files (``avail_operations`` and ``operations``)
-and three directories (``monitoring_attrs``, ``targets``, and ``schemes``)
-exist.
+In each context directory, three files (``avail_operations``, ``operations``
+and ``addr_unit``) and three directories (``monitoring_attrs``, ``targets``,
+and ``schemes``) exist.
-DAMON supports multiple types of monitoring operations, including those for
-virtual address space and the physical address space. You can get the list of
-available monitoring operations set on the currently running kernel by reading
+DAMON supports multiple types of :ref:`monitoring operations
+<damon_design_configurable_operations_set>`, including those for virtual address
+space and the physical address space. You can get the list of available
+monitoring operations set on the currently running kernel by reading
``avail_operations`` file. Based on the kernel configuration, the file will
-list some or all of below keywords.
-
- - vaddr: Monitor virtual address spaces of specific processes
- - fvaddr: Monitor fixed virtual address ranges
- - paddr: Monitor the physical address space of the system
-
-Please refer to :ref:`regions sysfs directory <sysfs_regions>` for detailed
-differences between the operations sets in terms of the monitoring target
-regions.
+list different available operation sets. Please refer to the :ref:`design
+<damon_operations_set>` for the list of all available operation sets and their
+brief explanations.
You can set and get what type of monitoring operations DAMON will use for the
context by writing one of the keywords listed in ``avail_operations`` file and
reading from the ``operations`` file.
+``addr_unit`` file is for setting and getting the :ref:`address unit
+<damon_design_addr_unit>` parameter of the operations set.
+
.. _sysfs_monitoring_attrs:
contexts/<N>/monitoring_attrs/
@@ -221,6 +232,25 @@ writing to and rading from the files.
For more details about the intervals and monitoring regions range, please refer
to the Design document (:doc:`/mm/damon/design`).
+.. _damon_usage_sysfs_monitoring_intervals_goal:
+
+contexts/<N>/monitoring_attrs/intervals/intervals_goal/
+-------------------------------------------------------
+
+Under the ``intervals`` directory, one directory for automated tuning of
+``sample_us`` and ``aggr_us``, namely ``intervals_goal`` directory also exists.
+Under the directory, four files for the auto-tuning control, namely
+``access_bp``, ``aggrs``, ``min_sample_us`` and ``max_sample_us`` exist.
+Please refer to the :ref:`design document of the feature
+<damon_design_monitoring_intervals_autotuning>` for the internal of the tuning
+mechanism. Reading and writing the four files under ``intervals_goal``
+directory shows and updates the tuning parameters that described in the
+:ref:design doc <damon_design_monitoring_intervals_autotuning>` with the same
+names. The tuning starts with the user-set ``sample_us`` and ``aggr_us``. The
+tuning-applied current values of the two intervals can be read from the
+``sample_us`` and ``aggr_us`` files after writing ``update_tuned_intervals`` to
+the ``state`` file.
+
.. _sysfs_targets:
contexts/<N>/targets/
@@ -235,29 +265,30 @@ to ``N-1``. Each directory represents each monitoring target.
targets/<N>/
------------
-In each target directory, one file (``pid_target``) and one directory
-(``regions``) exist.
+In each target directory, two files (``pid_target`` and ``obsolete_target``)
+and one directory (``regions``) exist.
If you wrote ``vaddr`` to the ``contexts/<N>/operations``, each target should
be a process. You can specify the process to DAMON by writing the pid of the
process to the ``pid_target`` file.
+Users can selectively remove targets in the middle of the targets array by
+writing non-zero value to ``obsolete_target`` file and committing it (writing
+``commit`` to ``state`` file). DAMON will remove the matching targets from its
+internal targets array. Users are responsible to construct target directories
+again, so that those correctly represent the changed internal targets array.
+
+
.. _sysfs_regions:
targets/<N>/regions
-------------------
-When ``vaddr`` monitoring operations set is being used (``vaddr`` is written to
-the ``contexts/<N>/operations`` file), DAMON automatically sets and updates the
-monitoring target regions so that entire memory mappings of target processes
-can be covered. However, users could want to set the initial monitoring region
-to specific address ranges.
-
-In contrast, DAMON do not automatically sets and updates the monitoring target
-regions when ``fvaddr`` or ``paddr`` monitoring operations sets are being used
-(``fvaddr`` or ``paddr`` have written to the ``contexts/<N>/operations``).
-Therefore, users should set the monitoring target regions by themselves in the
-cases.
+In case of ``fvaddr`` or ``paddr`` monitoring operations sets, users are
+required to set the monitoring target address ranges. In case of ``vaddr``
+operations set, it is not mandatory, but users can optionally set the initial
+monitoring region to specific address ranges. Please refer to the :ref:`design
+<damon_design_vaddr_target_regions_construction>` for more details.
For such cases, users can explicitly set the initial monitoring target regions
as they want, by writing proper values to the files under this directory.
@@ -266,6 +297,11 @@ In the beginning, this directory has only one file, ``nr_regions``. Writing a
number (``N``) to the file creates the number of child directories named ``0``
to ``N-1``. Each directory represents each initial monitoring target region.
+If ``nr_regions`` is zero when committing new DAMON parameters online (writing
+``commit`` to ``state`` file of :ref:`kdamond <sysfs_kdamond>`), the commit
+logic ignores the target regions. In other words, the current monitoring
+results for the target are preserved.
+
.. _sysfs_region:
regions/<N>/
@@ -296,33 +332,19 @@ to ``N-1``. Each directory represents each DAMON-based operation scheme.
schemes/<N>/
------------
-In each scheme directory, five directories (``access_pattern``, ``quotas``,
-``watermarks``, ``filters``, ``stats``, and ``tried_regions``) and two files
-(``action`` and ``apply_interval``) exist.
+In each scheme directory, eight directories (``access_pattern``, ``quotas``,
+``watermarks``, ``core_filters``, ``ops_filters``, ``filters``, ``dests``,
+``stats``, and ``tried_regions``) and three files (``action``, ``target_nid``
+and ``apply_interval``) exist.
The ``action`` file is for setting and getting the scheme's :ref:`action
<damon_design_damos_action>`. The keywords that can be written to and read
-from the file and their meaning are as below.
-
-Note that support of each action depends on the running DAMON operations set
-:ref:`implementation <sysfs_context>`.
-
- - ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``.
- Supported by ``vaddr`` and ``fvaddr`` operations set.
- - ``cold``: Call ``madvise()`` for the region with ``MADV_COLD``.
- Supported by ``vaddr`` and ``fvaddr`` operations set.
- - ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``.
- Supported by ``vaddr``, ``fvaddr`` and ``paddr`` operations set.
- - ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``.
- Supported by ``vaddr`` and ``fvaddr`` operations set.
- - ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``.
- Supported by ``vaddr`` and ``fvaddr`` operations set.
- - ``lru_prio``: Prioritize the region on its LRU lists.
- Supported by ``paddr`` operations set.
- - ``lru_deprio``: Deprioritize the region on its LRU lists.
- Supported by ``paddr`` operations set.
- - ``stat``: Do nothing but count the statistics.
- Supported by all operations sets.
+from the file and their meaning are same to those of the list on
+:ref:`design doc <damon_design_damos_action>`.
+
+The ``target_nid`` file is for setting the migration target node, which is
+only meaningful when the ``action`` is either ``migrate_hot`` or
+``migrate_cold``.
The ``apply_interval_us`` file is for setting and getting the scheme's
:ref:`apply_interval <damon_design_damos>` in microseconds.
@@ -350,8 +372,9 @@ schemes/<N>/quotas/
The directory for the :ref:`quotas <damon_design_damos_quotas>` of the given
DAMON-based operation scheme.
-Under ``quotas`` directory, three files (``ms``, ``bytes``,
-``reset_interval_ms``) and two directores (``weights`` and ``goals``) exist.
+Under ``quotas`` directory, four files (``ms``, ``bytes``,
+``reset_interval_ms``, ``effective_bytes``) and two directories (``weights`` and
+``goals``) exist.
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
``reset interval`` in milliseconds by writing the values to the three files,
@@ -359,7 +382,17 @@ respectively. Then, DAMON tries to use only up to ``time quota`` milliseconds
for applying the ``action`` to memory regions of the ``access_pattern``, and to
apply the action to only up to ``bytes`` bytes of memory regions within the
``reset_interval_ms``. Setting both ``ms`` and ``bytes`` zero disables the
-quota limits.
+quota limits unless at least one :ref:`goal <sysfs_schemes_quota_goals>` is
+set.
+
+The time quota is internally transformed to a size quota. Between the
+transformed size quota and user-specified size quota, smaller one is applied.
+Based on the user-specified :ref:`goal <sysfs_schemes_quota_goals>`, the
+effective size quota is further adjusted. Reading ``effective_bytes`` returns
+the current effective size quota. The file is not updated in real time, so
+users should ask DAMON sysfs interface to update the content of the file for
+the stats by writing a special keyword, ``update_schemes_effective_quotas`` to
+the relevant ``kdamonds/<N>/state`` file.
Under ``weights`` directory, three files (``sz_permil``,
``nr_accesses_permil``, and ``age_permil``) exist.
@@ -382,11 +415,11 @@ number (``N``) to the file creates the number of child directories named ``0``
to ``N-1``. Each directory represents each goal and current achievement.
Among the multiple feedback, the best one is used.
-Each goal directory contains two files, namely ``target_value`` and
-``current_value``. Users can set and get any number to those files to set the
-feedback. User space main workload's latency or throughput, system metrics
-like free memory ratio or memory pressure stall time (PSI) could be example
-metrics for the values. Note that users should write
+Each goal directory contains five files, namely ``target_metric``,
+``target_value``, ``current_value`` ``nid`` and ``path``. Users can set and
+get the five parameters for the quota auto-tuning goals that specified on the
+:ref:`design doc <damon_design_damos_quotas_auto_tuning>` by writing to and
+reading from each of the files. Note that users should further write
``commit_schemes_quota_goals`` to the ``state`` file of the :ref:`kdamond
directory <sysfs_kdamond>` to pass the feedback to DAMON.
@@ -413,70 +446,107 @@ The ``interval`` should written in microseconds unit.
.. _sysfs_filters:
-schemes/<N>/filters/
---------------------
+schemes/<N>/{core\_,ops\_,}filters/
+-----------------------------------
-The directory for the :ref:`filters <damon_design_damos_filters>` of the given
+Directories for :ref:`filters <damon_design_damos_filters>` of the given
DAMON-based operation scheme.
-In the beginning, this directory has only one file, ``nr_filters``. Writing a
+``core_filters`` and ``ops_filters`` directories are for the filters handled by
+the DAMON core layer and operations set layer, respectively. ``filters``
+directory can be used for installing filters regardless of their handled
+layers. Filters that requested by ``core_filters`` and ``ops_filters`` will be
+installed before those of ``filters``. All three directories have same files.
+
+Use of ``filters`` directory can make expecting evaluation orders of given
+filters with the files under directory bit confusing. Users are hence
+recommended to use ``core_filters`` and ``ops_filters`` directories. The
+``filters`` directory could be deprecated in future.
+
+In the beginning, the directory has only one file, ``nr_filters``. Writing a
number (``N``) to the file creates the number of child directories named ``0``
to ``N-1``. Each directory represents each filter. The filters are evaluated
in the numeric order.
-Each filter directory contains six files, namely ``type``, ``matcing``,
-``memcg_path``, ``addr_start``, ``addr_end``, and ``target_idx``. To ``type``
-file, you can write one of four special keywords: ``anon`` for anonymous pages,
-``memcg`` for specific memory cgroup, ``addr`` for specific address range (an
-open-ended interval), or ``target`` for specific DAMON monitoring target
-filtering. In case of the memory cgroup filtering, you can specify the memory
-cgroup of the interest by writing the path of the memory cgroup from the
-cgroups mount point to ``memcg_path`` file. In case of the address range
-filtering, you can specify the start and end address of the range to
-``addr_start`` and ``addr_end`` files, respectively. For the DAMON monitoring
-target filtering, you can specify the index of the target between the list of
-the DAMON context's monitoring targets list to ``target_idx`` file. You can
-write ``Y`` or ``N`` to ``matching`` file to filter out pages that does or does
-not match to the type, respectively. Then, the scheme's action will not be
-applied to the pages that specified to be filtered out.
+Each filter directory contains nine files, namely ``type``, ``matching``,
+``allow``, ``memcg_path``, ``addr_start``, ``addr_end``, ``min``, ``max``
+and ``target_idx``. To ``type`` file, you can write the type of the filter.
+Refer to :ref:`the design doc <damon_design_damos_filters>` for available type
+names, their meaning and on what layer those are handled.
+
+For ``memcg`` type, you can specify the memory cgroup of the interest by
+writing the path of the memory cgroup from the cgroups mount point to
+``memcg_path`` file. For ``addr`` type, you can specify the start and end
+address of the range (open-ended interval) to ``addr_start`` and ``addr_end``
+files, respectively. For ``hugepage_size`` type, you can specify the minimum
+and maximum size of the range (closed interval) to ``min`` and ``max`` files,
+respectively. For ``target`` type, you can specify the index of the target
+between the list of the DAMON context's monitoring targets list to
+``target_idx`` file.
+
+You can write ``Y`` or ``N`` to ``matching`` file to specify whether the filter
+is for memory that matches the ``type``. You can write ``Y`` or ``N`` to
+``allow`` file to specify if applying the action to the memory that satisfies
+the ``type`` and ``matching`` should be allowed or not.
For example, below restricts a DAMOS action to be applied to only non-anonymous
pages of all memory cgroups except ``/having_care_already``.::
+ # cd ops_filters/0/
# echo 2 > nr_filters
- # # filter out anonymous pages
+ # # disallow anonymous pages
echo anon > 0/type
echo Y > 0/matching
+ echo N > 0/allow
# # further filter out all cgroups except one at '/having_care_already'
echo memcg > 1/type
echo /having_care_already > 1/memcg_path
- echo N > 1/matching
+ echo Y > 1/matching
+ echo N > 1/allow
+
+Refer to the :ref:`DAMOS filters design documentation
+<damon_design_damos_filters>` for more details including how multiple filters
+of different ``allow`` works, when each of the filters are supported, and
+differences on stats.
-Note that ``anon`` and ``memcg`` filters are currently supported only when
-``paddr`` :ref:`implementation <sysfs_context>` is being used.
+.. _damon_sysfs_dests:
-Also, memory regions that are filtered out by ``addr`` or ``target`` filters
-are not counted as the scheme has tried to those, while regions that filtered
-out by other type filters are counted as the scheme has tried to. The
-difference is applied to :ref:`stats <damos_stats>` and
-:ref:`tried regions <sysfs_schemes_tried_regions>`.
+schemes/<N>/dests/
+------------------
+
+Directory for specifying the destinations of given DAMON-based operation
+scheme's action. This directory is ignored if the action of the given scheme
+is not supporting multiple destinations. Only ``DAMOS_MIGRATE_{HOT,COLD}``
+actions are supporting multiple destinations.
+
+In the beginning, the directory has only one file, ``nr_dests``. Writing a
+number (``N``) to the file creates the number of child directories named ``0``
+to ``N-1``. Each directory represents each action destination.
+
+Each destination directory contains two files, namely ``id`` and ``weight``.
+Users can write and read the identifier of the destination to ``id`` file.
+For ``DAMOS_MIGRATE_{HOT,COLD}`` actions, the migrate destination node's node
+id should be written to ``id`` file. Users can write and read the weight of
+the destination among the given destinations to the ``weight`` file. The
+weight can be an arbitrary integer. When DAMOS apply the action to each entity
+of the memory region, it will select the destination of the action based on the
+relative weights of the destinations.
.. _sysfs_schemes_stats:
schemes/<N>/stats/
------------------
-DAMON counts the total number and bytes of regions that each scheme is tried to
-be applied, the two numbers for the regions that each scheme is successfully
-applied, and the total number of the quota limit exceeds. This statistics can
-be used for online analysis or tuning of the schemes.
+DAMON counts statistics for each scheme. This statistics can be used for
+online analysis or tuning of the schemes. Refer to :ref:`design doc
+<damon_design_damos_stat>` for more details about the stats.
The statistics can be retrieved by reading the files under ``stats`` directory
-(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``, and
-``qt_exceeds``), respectively. The files are not updated in real time, so you
-should ask DAMON sysfs interface to update the content of the files for the
-stats by writing a special keyword, ``update_schemes_stats`` to the relevant
-``kdamonds/<N>/state`` file.
+(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``,
+``sz_ops_filter_passed``, and ``qt_exceeds``), respectively. The files are not
+updated in real time, so you should ask DAMON sysfs interface to update the
+content of the files for the stats by writing a special keyword,
+``update_schemes_stats`` to the relevant ``kdamonds/<N>/state`` file.
.. _sysfs_schemes_tried_regions:
@@ -513,10 +583,10 @@ set the ``access pattern`` as their interested pattern that they want to query.
tried_regions/<N>/
------------------
-In each region directory, you will find four files (``start``, ``end``,
-``nr_accesses``, and ``age``). Reading the files will show the start and end
-addresses, ``nr_accesses``, and ``age`` of the region that corresponding
-DAMON-based operation scheme ``action`` has tried to be applied.
+In each region directory, you will find five files (``start``, ``end``,
+``nr_accesses``, ``age``, and ``sz_filter_passed``). Reading the files will
+show the properties of the region that corresponding DAMON-based operation
+scheme ``action`` has tried to be applied.
Example
~~~~~~~
@@ -555,7 +625,7 @@ memory rate becomes larger than 60%, or lower than 30%". ::
# echo 300 > watermarks/low
Please note that it's highly recommended to use user space tools like `damo
-<https://github.com/awslabs/damo>`_ rather than manually reading and writing
+<https://github.com/damonitor/damo>`_ rather than manually reading and writing
the files as above. Above is only for an example.
.. _tracepoint:
@@ -579,11 +649,11 @@ monitoring results recording.
While the monitoring is turned on, you could record the tracepoint events and
show results using tracepoint supporting tools like ``perf``. For example::
- # echo on > monitor_on
+ # echo on > kdamonds/0/state
# perf record -e damon:damon_aggregated &
# sleep 5
# kill 9 $(pidof perf)
- # echo off > monitor_on
+ # echo off > kdamonds/0/state
# perf script
kdamond.0 46568 [027] 79357.842179: damon:damon_aggregated: target_id=0 nr_regions=11 122509119488-135708762112: 0 864
[...]
@@ -612,300 +682,3 @@ fields are as usual. It shows the index of the DAMON context (``ctx_idx=X``)
of the scheme in the list of the contexts of the context's kdamond, the index
of the scheme (``scheme_idx=X``) in the list of the schemes of the context, in
addition to the output of ``damon_aggregated`` tracepoint.
-
-
-.. _debugfs_interface:
-
-debugfs Interface (DEPRECATED!)
-===============================
-
-.. note::
-
- THIS IS DEPRECATED!
-
- DAMON debugfs interface is deprecated, so users should move to the
- :ref:`sysfs interface <sysfs_interface>`. If you depend on this and cannot
- move, please report your usecase to damon@lists.linux.dev and
- linux-mm@kvack.org.
-
-DAMON exports eight files, ``attrs``, ``target_ids``, ``init_regions``,
-``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` and
-``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
-
-
-Attributes
-----------
-
-Users can get and set the ``sampling interval``, ``aggregation interval``,
-``update interval``, and min/max number of monitoring target regions by
-reading from and writing to the ``attrs`` file. To know about the monitoring
-attributes in detail, please refer to the :doc:`/mm/damon/design`. For
-example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
-1000, and then check it again::
-
- # cd <debugfs>/damon
- # echo 5000 100000 1000000 10 1000 > attrs
- # cat attrs
- 5000 100000 1000000 10 1000
-
-
-Target IDs
-----------
-
-Some types of address spaces supports multiple monitoring target. For example,
-the virtual memory address spaces monitoring can have multiple processes as the
-monitoring targets. Users can set the targets by writing relevant id values of
-the targets to, and get the ids of the current targets by reading from the
-``target_ids`` file. In case of the virtual address spaces monitoring, the
-values should be pids of the monitoring target processes. For example, below
-commands set processes having pids 42 and 4242 as the monitoring targets and
-check it again::
-
- # cd <debugfs>/damon
- # echo 42 4242 > target_ids
- # cat target_ids
- 42 4242
-
-Users can also monitor the physical memory address space of the system by
-writing a special keyword, "``paddr\n``" to the file. Because physical address
-space monitoring doesn't support multiple targets, reading the file will show a
-fake value, ``42``, as below::
-
- # cd <debugfs>/damon
- # echo paddr > target_ids
- # cat target_ids
- 42
-
-Note that setting the target ids doesn't start the monitoring.
-
-
-Initial Monitoring Target Regions
----------------------------------
-
-In case of the virtual address space monitoring, DAMON automatically sets and
-updates the monitoring target regions so that entire memory mappings of target
-processes can be covered. However, users can want to limit the monitoring
-region to specific address ranges, such as the heap, the stack, or specific
-file-mapped area. Or, some users can know the initial access pattern of their
-workloads and therefore want to set optimal initial regions for the 'adaptive
-regions adjustment'.
-
-In contrast, DAMON do not automatically sets and updates the monitoring target
-regions in case of physical memory monitoring. Therefore, users should set the
-monitoring target regions by themselves.
-
-In such cases, users can explicitly set the initial monitoring target regions
-as they want, by writing proper values to the ``init_regions`` file. The input
-should be a sequence of three integers separated by white spaces that represent
-one region in below form.::
-
- <target idx> <start address> <end address>
-
-The ``target idx`` should be the index of the target in ``target_ids`` file,
-starting from ``0``, and the regions should be passed in address order. For
-example, below commands will set a couple of address ranges, ``1-100`` and
-``100-200`` as the initial monitoring target region of pid 42, which is the
-first one (index ``0``) in ``target_ids``, and another couple of address
-ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
-(index ``1``) in ``target_ids``.::
-
- # cd <debugfs>/damon
- # cat target_ids
- 42 4242
- # echo "0 1 100 \
- 0 100 200 \
- 1 20 40 \
- 1 50 100" > init_regions
-
-Note that this sets the initial monitoring target regions only. In case of
-virtual memory monitoring, DAMON will automatically updates the boundary of the
-regions after one ``update interval``. Therefore, users should set the
-``update interval`` large enough in this case, if they don't want the
-update.
-
-
-Schemes
--------
-
-Users can get and set the DAMON-based operation :ref:`schemes
-<damon_design_damos>` by reading from and writing to ``schemes`` debugfs file.
-Reading the file also shows the statistics of each scheme. To the file, each
-of the schemes should be represented in each line in below form::
-
- <target access pattern> <action> <quota> <watermarks>
-
-You can disable schemes by simply writing an empty string to the file.
-
-Target Access Pattern
-~~~~~~~~~~~~~~~~~~~~~
-
-The target access :ref:`pattern <damon_design_damos_access_pattern>` of the
-scheme. The ``<target access pattern>`` is constructed with three ranges in
-below form::
-
- min-size max-size min-acc max-acc min-age max-age
-
-Specifically, bytes for the size of regions (``min-size`` and ``max-size``),
-number of monitored accesses per aggregate interval for access frequency
-(``min-acc`` and ``max-acc``), number of aggregate intervals for the age of
-regions (``min-age`` and ``max-age``) are specified. Note that the ranges are
-closed interval.
-
-Action
-~~~~~~
-
-The ``<action>`` is a predefined integer for memory management :ref:`actions
-<damon_design_damos_action>`. The supported numbers and their meanings are as
-below.
-
- - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``. Ignored if
- ``target`` is ``paddr``.
- - 1: Call ``madvise()`` for the region with ``MADV_COLD``. Ignored if
- ``target`` is ``paddr``.
- - 2: Call ``madvise()`` for the region with ``MADV_PAGEOUT``.
- - 3: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``. Ignored if
- ``target`` is ``paddr``.
- - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``. Ignored if
- ``target`` is ``paddr``.
- - 5: Do nothing but count the statistics
-
-Quota
-~~~~~
-
-Users can set the :ref:`quotas <damon_design_damos_quotas>` of the given scheme
-via the ``<quota>`` in below form::
-
- <ms> <sz> <reset interval> <priority weights>
-
-This makes DAMON to try to use only up to ``<ms>`` milliseconds for applying
-the action to memory regions of the ``target access pattern`` within the
-``<reset interval>`` milliseconds, and to apply the action to only up to
-``<sz>`` bytes of memory regions within the ``<reset interval>``. Setting both
-``<ms>`` and ``<sz>`` zero disables the quota limits.
-
-For the :ref:`prioritization <damon_design_damos_quotas_prioritization>`, users
-can set the weights for the three properties in ``<priority weights>`` in below
-form::
-
- <size weight> <access frequency weight> <age weight>
-
-Watermarks
-~~~~~~~~~~
-
-Users can specify :ref:`watermarks <damon_design_damos_watermarks>` of the
-given scheme via ``<watermarks>`` in below form::
-
- <metric> <check interval> <high mark> <middle mark> <low mark>
-
-``<metric>`` is a predefined integer for the metric to be checked. The
-supported numbers and their meanings are as below.
-
- - 0: Ignore the watermarks
- - 1: System's free memory rate (per thousand)
-
-The value of the metric is checked every ``<check interval>`` microseconds.
-
-If the value is higher than ``<high mark>`` or lower than ``<low mark>``, the
-scheme is deactivated. If the value is lower than ``<mid mark>``, the scheme
-is activated.
-
-.. _damos_stats:
-
-Statistics
-~~~~~~~~~~
-
-It also counts the total number and bytes of regions that each scheme is tried
-to be applied, the two numbers for the regions that each scheme is successfully
-applied, and the total number of the quota limit exceeds. This statistics can
-be used for online analysis or tuning of the schemes.
-
-The statistics can be shown by reading the ``schemes`` file. Reading the file
-will show each scheme you entered in each line, and the five numbers for the
-statistics will be added at the end of each line.
-
-Example
-~~~~~~~
-
-Below commands applies a scheme saying "If a memory region of size in [4KiB,
-8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
-interval in [10, 20], page out the region. For the paging out, use only up to
-10ms per second, and also don't page out more than 1GiB per second. Under the
-limitation, page out memory regions having longer age first. Also, check the
-free memory rate of the system every 5 seconds, start the monitoring and paging
-out when the free memory rate becomes lower than 50%, but stop it if the free
-memory rate becomes larger than 60%, or lower than 30%".::
-
- # cd <debugfs>/damon
- # scheme="4096 8192 0 5 10 20 2" # target access pattern and action
- # scheme+=" 10 $((1024*1024*1024)) 1000" # quotas
- # scheme+=" 0 0 100" # prioritization weights
- # scheme+=" 1 5000000 600 500 300" # watermarks
- # echo "$scheme" > schemes
-
-
-Turning On/Off
---------------
-
-Setting the files as described above doesn't incur effect unless you explicitly
-start the monitoring. You can start, stop, and check the current status of the
-monitoring by writing to and reading from the ``monitor_on`` file. Writing
-``on`` to the file starts the monitoring of the targets with the attributes.
-Writing ``off`` to the file stops those. DAMON also stops if every target
-process is terminated. Below example commands turn on, off, and check the
-status of DAMON::
-
- # cd <debugfs>/damon
- # echo on > monitor_on
- # echo off > monitor_on
- # cat monitor_on
- off
-
-Please note that you cannot write to the above-mentioned debugfs files while
-the monitoring is turned on. If you write to the files while DAMON is running,
-an error code such as ``-EBUSY`` will be returned.
-
-
-Monitoring Thread PID
----------------------
-
-DAMON does requested monitoring with a kernel thread called ``kdamond``. You
-can get the pid of the thread by reading the ``kdamond_pid`` file. When the
-monitoring is turned off, reading the file returns ``none``. ::
-
- # cd <debugfs>/damon
- # cat monitor_on
- off
- # cat kdamond_pid
- none
- # echo on > monitor_on
- # cat kdamond_pid
- 18594
-
-
-Using Multiple Monitoring Threads
----------------------------------
-
-One ``kdamond`` thread is created for each monitoring context. You can create
-and remove monitoring contexts for multiple ``kdamond`` required use case using
-the ``mk_contexts`` and ``rm_contexts`` files.
-
-Writing the name of the new context to the ``mk_contexts`` file creates a
-directory of the name on the DAMON debugfs directory. The directory will have
-DAMON debugfs files for the context. ::
-
- # cd <debugfs>/damon
- # ls foo
- # ls: cannot access 'foo': No such file or directory
- # echo foo > mk_contexts
- # ls foo
- # attrs init_regions kdamond_pid schemes target_ids
-
-If the context is not needed anymore, you can remove it and the corresponding
-directory by putting the name of the context to the ``rm_contexts`` file. ::
-
- # echo foo > rm_contexts
- # ls foo
- # ls: cannot access 'foo': No such file or directory
-
-Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on`` files are in the
-root directory only.
diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst
index e4d4b4a8dc97..67a941903fd2 100644
--- a/Documentation/admin-guide/mm/hugetlbpage.rst
+++ b/Documentation/admin-guide/mm/hugetlbpage.rst
@@ -145,7 +145,17 @@ hugepages
It will allocate 1 2M hugepage on node0 and 2 2M hugepages on node1.
If the node number is invalid, the parameter will be ignored.
+hugepage_alloc_threads
+ Specify the number of threads that should be used to allocate hugepages
+ during boot. This parameter can be used to improve system bootup time
+ when allocating a large amount of huge pages.
+ The default value is 25% of the available hardware threads.
+ Example to use 8 allocation threads::
+
+ hugepage_alloc_threads=8
+
+ Note that this parameter only applies to non-gigantic huge pages.
default_hugepagesz
Specify the default huge page size. This parameter can
only be specified once on the command line. default_hugepagesz can
@@ -376,6 +386,13 @@ Note that the number of overcommit and reserve pages remain global quantities,
as we don't know until fault time, when the faulting task's mempolicy is
applied, from which node the huge page allocation will be attempted.
+The hugetlb may be migrated between the per-node hugepages pool in the following
+scenarios: memory offline, memory failure, longterm pinning, syscalls(mbind,
+migrate_pages and move_pages), alloc_contig_range() and alloc_contig_pages().
+Now only memory offline, memory failure and syscalls allow fallbacking to allocate
+a new hugetlb on a different node if the current node is unable to allocate during
+hugetlb migration, that means these 3 cases can break the per-node hugepages pool.
+
.. _using_huge_pages:
Using Huge Pages
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
index 1f883abf3f00..bbb563cba5d2 100644
--- a/Documentation/admin-guide/mm/index.rst
+++ b/Documentation/admin-guide/mm/index.rst
@@ -10,7 +10,7 @@ processes address space and many other cool things.
Linux memory management is a complex system with many configurable
settings. Most of these settings are available via ``/proc``
-filesystem and can be quired and adjusted using ``sysctl``. These APIs
+filesystem and can be queried and adjusted using ``sysctl``. These APIs
are described in Documentation/admin-guide/sysctl/vm.rst and in `man 5 proc`_.
.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
@@ -37,8 +37,9 @@ the Linux memory management.
numaperf
pagemap
shrinker_debugfs
+ slab
soft-dirty
- swap_numa
transhuge
userfaultfd
zswap
+ kho
diff --git a/Documentation/admin-guide/mm/kho.rst b/Documentation/admin-guide/mm/kho.rst
new file mode 100644
index 000000000000..6dc18ed4b886
--- /dev/null
+++ b/Documentation/admin-guide/mm/kho.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+====================
+Kexec Handover Usage
+====================
+
+Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory
+regions, which could contain serialized system states, across kexec.
+
+This document expects that you are familiar with the base KHO
+:ref:`concepts <kho-concepts>`. If you have not read
+them yet, please do so now.
+
+Prerequisites
+=============
+
+KHO is available when the kernel is compiled with ``CONFIG_KEXEC_HANDOVER``
+set to y. Every KHO producer may have its own config option that you
+need to enable if you would like to preserve their respective state across
+kexec.
+
+To use KHO, please boot the kernel with the ``kho=on`` command line
+parameter. You may use ``kho_scratch`` parameter to define size of the
+scratch regions. For example ``kho_scratch=16M,512M,256M`` will reserve a
+16 MiB low memory scratch area, a 512 MiB global scratch region, and 256 MiB
+per NUMA node scratch regions on boot.
+
+Perform a KHO kexec
+===================
+
+First, before you perform a KHO kexec, you need to move the system into
+the :ref:`KHO finalization phase <kho-finalization-phase>` ::
+
+ $ echo 1 > /sys/kernel/debug/kho/out/finalize
+
+After this command, the KHO FDT is available in
+``/sys/kernel/debug/kho/out/fdt``. Other subsystems may also register
+their own preserved sub FDTs under
+``/sys/kernel/debug/kho/out/sub_fdts/``.
+
+Next, load the target payload and kexec into it. It is important that you
+use the ``-s`` parameter to use the in-kernel kexec file loader, as user
+space kexec tooling currently has no support for KHO with the user space
+based file loader ::
+
+ # kexec -l /path/to/bzImage --initrd /path/to/initrd -s
+ # kexec -e
+
+The new kernel will boot up and contain some of the previous kernel's state.
+
+For example, if you used ``reserve_mem`` command line parameter to create
+an early memory reservation, the new kernel will have that memory at the
+same physical address as the old kernel.
+
+Abort a KHO exec
+================
+
+You can move the system out of KHO finalization phase again by calling ::
+
+ $ echo 0 > /sys/kernel/debug/kho/out/active
+
+After this command, the KHO FDT is no longer available in
+``/sys/kernel/debug/kho/out/fdt``.
+
+debugfs Interfaces
+==================
+
+Currently KHO creates the following debugfs interfaces. Notice that these
+interfaces may change in the future. They will be moved to sysfs once KHO is
+stabilized.
+
+``/sys/kernel/debug/kho/out/finalize``
+ Kexec HandOver (KHO) allows Linux to transition the state of
+ compatible drivers into the next kexec'ed kernel. To do so,
+ device drivers will instruct KHO to preserve memory regions,
+ which could contain serialized kernel state.
+ While the state is serialized, they are unable to perform
+ any modifications to state that was serialized, such as
+ handed over memory allocations.
+
+ When this file contains "1", the system is in the transition
+ state. When contains "0", it is not. To switch between the
+ two states, echo the respective number into this file.
+
+``/sys/kernel/debug/kho/out/fdt``
+ When KHO state tree is finalized, the kernel exposes the
+ flattened device tree blob that carries its current KHO
+ state in this file. Kexec user space tooling can use this
+ as input file for the KHO payload image.
+
+``/sys/kernel/debug/kho/out/scratch_len``
+ Lengths of KHO scratch regions, which are physically contiguous
+ memory regions that will always stay available for future kexec
+ allocations. Kexec user space tools can use this file to determine
+ where it should place its payload images.
+
+``/sys/kernel/debug/kho/out/scratch_phys``
+ Physical locations of KHO scratch regions. Kexec user space tools
+ can use this file in conjunction to scratch_phys to determine where
+ it should place its payload images.
+
+``/sys/kernel/debug/kho/out/sub_fdts/``
+ In the KHO finalization phase, KHO producers register their own
+ FDT blob under this directory.
+
+``/sys/kernel/debug/kho/in/fdt``
+ When the kernel was booted with Kexec HandOver (KHO),
+ the state tree that carries metadata about the previous
+ kernel's state is in this file in the format of flattened
+ device tree. This file may disappear when all consumers of
+ it finished to interpret their metadata.
+
+``/sys/kernel/debug/kho/in/sub_fdts/``
+ Similar to ``kho/out/sub_fdts/``, but contains sub FDT blobs
+ of KHO producers passed from the old kernel.
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
index a639cac12477..ad8e7a41f3b5 100644
--- a/Documentation/admin-guide/mm/ksm.rst
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -308,7 +308,7 @@ limited by the ``advisor_max_cpu`` parameter. In addition there is also the
``advisor_target_scan_time`` parameter. This parameter sets the target time to
scan all the KSM candidate pages. The parameter ``advisor_target_scan_time``
decides how aggressive the scan time advisor scans candidate pages. Lower
-values make the scan time advisor to scan more aggresively. This is the most
+values make the scan time advisor to scan more aggressively. This is the most
important parameter for the configuration of the scan time advisor.
The initial value and the maximum value can be changed with
diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
index 098f14d83e99..33c886f3d198 100644
--- a/Documentation/admin-guide/mm/memory-hotplug.rst
+++ b/Documentation/admin-guide/mm/memory-hotplug.rst
@@ -280,8 +280,8 @@ The following files are currently defined:
blocks; configure auto-onlining.
The default value depends on the
- CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE kernel configuration
- option.
+ CONFIG_MHP_DEFAULT_ONLINE_TYPE kernel configuration
+ options.
See the ``state`` property of memory blocks for details.
``block_size_bytes`` read-only: the size in bytes of a memory block.
@@ -294,8 +294,9 @@ The following files are currently defined:
``crash_hotplug`` read-only: when changes to the system memory map
occur due to hot un/plug of memory, this file contains
'1' if the kernel updates the kdump capture kernel memory
- map itself (via elfcorehdr), or '0' if userspace must update
- the kdump capture kernel memory map.
+ map itself (via elfcorehdr and other relevant kexec
+ segments), or '0' if userspace must update the kdump
+ capture kernel memory map.
Availability depends on the CONFIG_MEMORY_HOTPLUG kernel
configuration option.
diff --git a/Documentation/admin-guide/mm/multigen_lru.rst b/Documentation/admin-guide/mm/multigen_lru.rst
index 33e068830497..9cb54b4ff5d9 100644
--- a/Documentation/admin-guide/mm/multigen_lru.rst
+++ b/Documentation/admin-guide/mm/multigen_lru.rst
@@ -151,8 +151,9 @@ generations less than or equal to ``min_gen_nr``.
``min_gen_nr`` should be less than ``max_gen_nr-1``, since
``max_gen_nr`` and ``max_gen_nr-1`` are not fully aged (equivalent to
the active list) and therefore cannot be evicted. ``swappiness``
-overrides the default value in ``/proc/sys/vm/swappiness``.
-``nr_to_reclaim`` limits the number of pages to evict.
+overrides the default value in ``/proc/sys/vm/swappiness`` and the valid
+range is [0-200, max], with max being exclusively used for the reclamation
+of anonymous memory. ``nr_to_reclaim`` limits the number of pages to evict.
A typical use case is that a job scheduler runs this command before it
tries to land a new job on a server. If it fails to materialize enough
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index eca38fa81e0f..a70f20ce1ffb 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -250,6 +250,15 @@ MPOL_PREFERRED_MANY
can fall back to all existing numa nodes. This is effectively
MPOL_PREFERRED allowed for a mask rather than a single node.
+MPOL_WEIGHTED_INTERLEAVE
+ This mode operates the same as MPOL_INTERLEAVE, except that
+ interleaving behavior is executed based on weights set in
+ /sys/kernel/mm/mempolicy/weighted_interleave/
+
+ Weighted interleave allocates pages on nodes according to a
+ weight. For example if nodes [0,1] are weighted [5,2], 5 pages
+ will be allocated on node0 for every 2 pages allocated on node1.
+
NUMA memory policy supports the following optional mode flags:
MPOL_F_STATIC_NODES
diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index f5f065c67615..c57e61b5d8aa 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -21,7 +21,8 @@ There are four components to pagemap:
* Bit 56 page exclusively mapped (since 4.2)
* Bit 57 pte is uffd-wp write-protected (since 5.13) (see
Documentation/admin-guide/mm/userfaultfd.rst)
- * Bits 58-60 zero
+ * Bit 58 pte is a guard region (since 6.15) (see madvise (2) man page)
+ * Bits 59-60 zero
* Bit 61 page is file-page or shared-anon (since 3.5)
* Bit 62 page swapped
* Bit 63 page present
@@ -37,12 +38,28 @@ There are four components to pagemap:
precisely which pages are mapped (or in swap) and comparing mapped
pages between processes.
+ Traditionally, bit 56 indicates that a page is mapped exactly once and bit
+ 56 is clear when a page is mapped multiple times, even when mapped in the
+ same process multiple times. In some kernel configurations, the semantics
+ for pages part of a larger allocation (e.g., THP) can differ: bit 56 is set
+ if all pages part of the corresponding large allocation are *certainly*
+ mapped in the same process, even if the page is mapped multiple times in that
+ process. Bit 56 is clear when any page page of the larger allocation
+ is *maybe* mapped in a different process. In some cases, a large allocation
+ might be treated as "maybe mapped by multiple processes" even though this
+ is no longer the case.
+
Efficient users of this interface will use ``/proc/pid/maps`` to
determine which areas of memory are actually mapped and llseek to
skip over unmapped regions.
* ``/proc/kpagecount``. This file contains a 64-bit count of the number of
- times each page is mapped, indexed by PFN.
+ times each page is mapped, indexed by PFN. Some kernel configurations do
+ not track the precise number of times a page part of a larger allocation
+ (e.g., THP) is mapped. In these configurations, the average number of
+ mappings per page in this larger allocation is returned instead. However,
+ if any page of the large allocation is mapped, the returned value will
+ be at least 1.
The page-types tool in the tools/mm directory can be used to query the
number of times a page is mapped.
@@ -98,7 +115,8 @@ Short descriptions to the page flags
A free memory block managed by the buddy system allocator.
The buddy system organizes free memory in blocks of various orders.
An order N block has 2^N physically contiguous pages, with the BUDDY flag
- set for and _only_ for the first page.
+ set for all pages.
+ Before 4.6 only the first page of the block had the flag set.
15 - COMPOUND_HEAD
A compound page with order N consists of 2^N physically contiguous pages.
A compound page with order 2 takes the form of "HTTT", where H donates its
@@ -118,7 +136,7 @@ Short descriptions to the page flags
21 - KSM
Identical memory pages dynamically shared between one or more processes.
22 - THP
- Contiguous pages which construct transparent hugepages.
+ Contiguous pages which construct THP of any size and mapped by any granularity.
23 - OFFLINE
The page is logically offline.
24 - ZERO_PAGE
@@ -173,27 +191,6 @@ LRU related page flags
The page-types tool in the tools/mm directory can be used to query the
above flags.
-Using pagemap to do something useful
-====================================
-
-The general procedure for using pagemap to find out about a process' memory
-usage goes like this:
-
- 1. Read ``/proc/pid/maps`` to determine which parts of the memory space are
- mapped to what.
- 2. Select the maps you are interested in -- all of them, or a particular
- library, or the stack or the heap, etc.
- 3. Open ``/proc/pid/pagemap`` and seek to the pages you would like to examine.
- 4. Read a u64 for each page from pagemap.
- 5. Open ``/proc/kpagecount`` and/or ``/proc/kpageflags``. For each PFN you
- just read, seek to that entry in the file, and read the data you want.
-
-For example, to find the "unique set size" (USS), which is the amount of
-memory that a process is using that is not shared with any other process,
-you can go through every map in the process, find the PFNs, look those up
-in kpagecount, and tally up the number of pages that are only referenced
-once.
-
Exceptions for Shared Memory
============================
@@ -252,8 +249,9 @@ Following flags about pages are currently supported:
- ``PAGE_IS_PRESENT`` - Page is present in the memory
- ``PAGE_IS_SWAPPED`` - Page is in swapped
- ``PAGE_IS_PFNZERO`` - Page has zero PFN
-- ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed
+- ``PAGE_IS_HUGE`` - Page is PMD-mapped THP or Hugetlb backed
- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty
+- ``PAGE_IS_GUARD`` - Page is a part of a guard region
The ``struct pm_scan_arg`` is used as the argument of the IOCTL.
diff --git a/Documentation/admin-guide/mm/slab.rst b/Documentation/admin-guide/mm/slab.rst
new file mode 100644
index 000000000000..14429ab90611
--- /dev/null
+++ b/Documentation/admin-guide/mm/slab.rst
@@ -0,0 +1,469 @@
+========================================
+Short users guide for the slab allocator
+========================================
+
+The slab allocator includes full debugging support (when built with
+CONFIG_SLUB_DEBUG=y) but it is off by default (unless built with
+CONFIG_SLUB_DEBUG_ON=y). You can enable debugging only for selected
+slabs in order to avoid an impact on overall system performance which
+may make a bug more difficult to find.
+
+In order to switch debugging on one can add an option ``slab_debug``
+to the kernel command line. That will enable full debugging for
+all slabs.
+
+Typically one would then use the ``slabinfo`` command to get statistical
+data and perform operation on the slabs. By default ``slabinfo`` only lists
+slabs that have data in them. See "slabinfo -h" for more options when
+running the command. ``slabinfo`` can be compiled with
+::
+
+ gcc -o slabinfo tools/mm/slabinfo.c
+
+Some of the modes of operation of ``slabinfo`` require that slub debugging
+be enabled on the command line. F.e. no tracking information will be
+available without debugging on and validation can only partially
+be performed if debugging was not switched on.
+
+Some more sophisticated uses of slab_debug:
+-------------------------------------------
+
+Parameters may be given to ``slab_debug``. If none is specified then full
+debugging is enabled. Format:
+
+slab_debug=<Debug-Options>
+ Enable options for all slabs
+
+slab_debug=<Debug-Options>,<slab name1>,<slab name2>,...
+ Enable options only for select slabs (no spaces
+ after a comma)
+
+Multiple blocks of options for all slabs or selected slabs can be given, with
+blocks of options delimited by ';'. The last of "all slabs" blocks is applied
+to all slabs except those that match one of the "select slabs" block. Options
+of the first "select slabs" blocks that matches the slab's name are applied.
+
+Possible debug options are::
+
+ F Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS
+ Sorry SLAB legacy issues)
+ Z Red zoning
+ P Poisoning (object and padding)
+ U User tracking (free and alloc)
+ T Trace (please only use on single slabs)
+ A Enable failslab filter mark for the cache
+ O Switch debugging off for caches that would have
+ caused higher minimum slab orders
+ - Switch all debugging off (useful if the kernel is
+ configured with CONFIG_SLUB_DEBUG_ON)
+
+F.e. in order to boot just with sanity checks and red zoning one would specify::
+
+ slab_debug=FZ
+
+Trying to find an issue in the dentry cache? Try::
+
+ slab_debug=,dentry
+
+to only enable debugging on the dentry cache. You may use an asterisk at the
+end of the slab name, in order to cover all slabs with the same prefix. For
+example, here's how you can poison the dentry cache as well as all kmalloc
+slabs::
+
+ slab_debug=P,kmalloc-*,dentry
+
+Red zoning and tracking may realign the slab. We can just apply sanity checks
+to the dentry cache with::
+
+ slab_debug=F,dentry
+
+Debugging options may require the minimum possible slab order to increase as
+a result of storing the metadata (for example, caches with PAGE_SIZE object
+sizes). This has a higher likelihood of resulting in slab allocation errors
+in low memory situations or if there's high fragmentation of memory. To
+switch off debugging for such caches by default, use::
+
+ slab_debug=O
+
+You can apply different options to different list of slab names, using blocks
+of options. This will enable red zoning for dentry and user tracking for
+kmalloc. All other slabs will not get any debugging enabled::
+
+ slab_debug=Z,dentry;U,kmalloc-*
+
+You can also enable options (e.g. sanity checks and poisoning) for all caches
+except some that are deemed too performance critical and don't need to be
+debugged by specifying global debug options followed by a list of slab names
+with "-" as options::
+
+ slab_debug=FZ;-,zs_handle,zspage
+
+The state of each debug option for a slab can be found in the respective files
+under::
+
+ /sys/kernel/slab/<slab name>/
+
+If the file contains 1, the option is enabled, 0 means disabled. The debug
+options from the ``slab_debug`` parameter translate to the following files::
+
+ F sanity_checks
+ Z red_zone
+ P poison
+ U store_user
+ T trace
+ A failslab
+
+failslab file is writable, so writing 1 or 0 will enable or disable
+the option at runtime. Write returns -EINVAL if cache is an alias.
+Careful with tracing: It may spew out lots of information and never stop if
+used on the wrong slab.
+
+Slab merging
+============
+
+If no debug options are specified then SLUB may merge similar slabs together
+in order to reduce overhead and increase cache hotness of objects.
+``slabinfo -a`` displays which slabs were merged together.
+
+Slab validation
+===============
+
+SLUB can validate all object if the kernel was booted with slab_debug. In
+order to do so you must have the ``slabinfo`` tool. Then you can do
+::
+
+ slabinfo -v
+
+which will test all objects. Output will be generated to the syslog.
+
+This also works in a more limited way if boot was without slab debug.
+In that case ``slabinfo -v`` simply tests all reachable objects. Usually
+these are in the cpu slabs and the partial slabs. Full slabs are not
+tracked by SLUB in a non debug situation.
+
+Getting more performance
+========================
+
+To some degree SLUB's performance is limited by the need to take the
+list_lock once in a while to deal with partial slabs. That overhead is
+governed by the order of the allocation for each slab. The allocations
+can be influenced by kernel parameters:
+
+.. slab_min_objects=x (default: automatically scaled by number of cpus)
+.. slab_min_order=x (default 0)
+.. slab_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
+
+``slab_min_objects``
+ allows to specify how many objects must at least fit into one
+ slab in order for the allocation order to be acceptable. In
+ general slub will be able to perform this number of
+ allocations on a slab without consulting centralized resources
+ (list_lock) where contention may occur.
+
+``slab_min_order``
+ specifies a minimum order of slabs. A similar effect like
+ ``slab_min_objects``.
+
+``slab_max_order``
+ specified the order at which ``slab_min_objects`` should no
+ longer be checked. This is useful to avoid SLUB trying to
+ generate super large order pages to fit ``slab_min_objects``
+ of a slab cache with large object sizes into one high order
+ page. Setting command line parameter
+ ``debug_guardpage_minorder=N`` (N > 0), forces setting
+ ``slab_max_order`` to 0, what cause minimum possible order of
+ slabs allocation.
+
+``slab_strict_numa``
+ Enables the application of memory policies on each
+ allocation. This results in more accurate placement of
+ objects which may result in the reduction of accesses
+ to remote nodes. The default is to only apply memory
+ policies at the folio level when a new folio is acquired
+ or a folio is retrieved from the lists. Enabling this
+ option reduces the fastpath performance of the slab allocator.
+
+SLUB Debug output
+=================
+
+Here is a sample of slub debug output::
+
+ ====================================================================
+ BUG kmalloc-8: Right Redzone overwritten
+ --------------------------------------------------------------------
+
+ INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
+ INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58
+ INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
+ INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
+
+ Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+ Object (0xc90f6d20): 31 30 31 39 2e 30 30 35 1019.005
+ Redzone (0xc90f6d28): 00 cc cc cc .
+ Padding (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
+
+ [<c010523d>] dump_trace+0x63/0x1eb
+ [<c01053df>] show_trace_log_lvl+0x1a/0x2f
+ [<c010601d>] show_trace+0x12/0x14
+ [<c0106035>] dump_stack+0x16/0x18
+ [<c017e0fa>] object_err+0x143/0x14b
+ [<c017e2cc>] check_object+0x66/0x234
+ [<c017eb43>] __slab_free+0x239/0x384
+ [<c017f446>] kfree+0xa6/0xc6
+ [<c02e2335>] get_modalias+0xb9/0xf5
+ [<c02e23b7>] dmi_dev_uevent+0x27/0x3c
+ [<c027866a>] dev_uevent+0x1ad/0x1da
+ [<c0205024>] kobject_uevent_env+0x20a/0x45b
+ [<c020527f>] kobject_uevent+0xa/0xf
+ [<c02779f1>] store_uevent+0x4f/0x58
+ [<c027758e>] dev_attr_store+0x29/0x2f
+ [<c01bec4f>] sysfs_write_file+0x16e/0x19c
+ [<c0183ba7>] vfs_write+0xd1/0x15a
+ [<c01841d7>] sys_write+0x3d/0x72
+ [<c0104112>] sysenter_past_esp+0x5f/0x99
+ [<b7f7b410>] 0xb7f7b410
+ =======================
+
+ FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
+
+If SLUB encounters a corrupted object (full detection requires the kernel
+to be booted with slab_debug) then the following output will be dumped
+into the syslog:
+
+1. Description of the problem encountered
+
+ This will be a message in the system log starting with::
+
+ ===============================================
+ BUG <slab cache affected>: <What went wrong>
+ -----------------------------------------------
+
+ INFO: <corruption start>-<corruption_end> <more info>
+ INFO: Slab <address> <slab information>
+ INFO: Object <address> <object information>
+ INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
+ cpu> pid=<pid of the process>
+ INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
+ pid=<pid of the process>
+
+ (Object allocation / free information is only available if SLAB_STORE_USER is
+ set for the slab. slab_debug sets that option)
+
+2. The object contents if an object was involved.
+
+ Various types of lines can follow the BUG SLUB line:
+
+ Bytes b4 <address> : <bytes>
+ Shows a few bytes before the object where the problem was detected.
+ Can be useful if the corruption does not stop with the start of the
+ object.
+
+ Object <address> : <bytes>
+ The bytes of the object. If the object is inactive then the bytes
+ typically contain poison values. Any non-poison value shows a
+ corruption by a write after free.
+
+ Redzone <address> : <bytes>
+ The Redzone following the object. The Redzone is used to detect
+ writes after the object. All bytes should always have the same
+ value. If there is any deviation then it is due to a write after
+ the object boundary.
+
+ (Redzone information is only available if SLAB_RED_ZONE is set.
+ slab_debug sets that option)
+
+ Padding <address> : <bytes>
+ Unused data to fill up the space in order to get the next object
+ properly aligned. In the debug case we make sure that there are
+ at least 4 bytes of padding. This allows the detection of writes
+ before the object.
+
+3. A stackdump
+
+ The stackdump describes the location where the error was detected. The cause
+ of the corruption is may be more likely found by looking at the function that
+ allocated or freed the object.
+
+4. Report on how the problem was dealt with in order to ensure the continued
+ operation of the system.
+
+ These are messages in the system log beginning with::
+
+ FIX <slab cache affected>: <corrective action taken>
+
+ In the above sample SLUB found that the Redzone of an active object has
+ been overwritten. Here a string of 8 characters was written into a slab that
+ has the length of 8 characters. However, a 8 character string needs a
+ terminating 0. That zero has overwritten the first byte of the Redzone field.
+ After reporting the details of the issue encountered the FIX SLUB message
+ tells us that SLUB has restored the Redzone to its proper value and then
+ system operations continue.
+
+Emergency operations
+====================
+
+Minimal debugging (sanity checks alone) can be enabled by booting with::
+
+ slab_debug=F
+
+This will be generally be enough to enable the resiliency features of slub
+which will keep the system running even if a bad kernel component will
+keep corrupting objects. This may be important for production systems.
+Performance will be impacted by the sanity checks and there will be a
+continual stream of error messages to the syslog but no additional memory
+will be used (unlike full debugging).
+
+No guarantees. The kernel component still needs to be fixed. Performance
+may be optimized further by locating the slab that experiences corruption
+and enabling debugging only for that cache
+
+I.e.::
+
+ slab_debug=F,dentry
+
+If the corruption occurs by writing after the end of the object then it
+may be advisable to enable a Redzone to avoid corrupting the beginning
+of other objects::
+
+ slab_debug=FZ,dentry
+
+Extended slabinfo mode and plotting
+===================================
+
+The ``slabinfo`` tool has a special 'extended' ('-X') mode that includes:
+ - Slabcache Totals
+ - Slabs sorted by size (up to -N <num> slabs, default 1)
+ - Slabs sorted by loss (up to -N <num> slabs, default 1)
+
+Additionally, in this mode ``slabinfo`` does not dynamically scale
+sizes (G/M/K) and reports everything in bytes (this functionality is
+also available to other slabinfo modes via '-B' option) which makes
+reporting more precise and accurate. Moreover, in some sense the `-X'
+mode also simplifies the analysis of slabs' behaviour, because its
+output can be plotted using the ``slabinfo-gnuplot.sh`` script. So it
+pushes the analysis from looking through the numbers (tons of numbers)
+to something easier -- visual analysis.
+
+To generate plots:
+
+a) collect slabinfo extended records, for example::
+
+ while [ 1 ]; do slabinfo -X >> FOO_STATS; sleep 1; done
+
+b) pass stats file(-s) to ``slabinfo-gnuplot.sh`` script::
+
+ slabinfo-gnuplot.sh FOO_STATS [FOO_STATS2 .. FOO_STATSN]
+
+ The ``slabinfo-gnuplot.sh`` script will pre-processes the collected records
+ and generates 3 png files (and 3 pre-processing cache files) per STATS
+ file:
+ - Slabcache Totals: FOO_STATS-totals.png
+ - Slabs sorted by size: FOO_STATS-slabs-by-size.png
+ - Slabs sorted by loss: FOO_STATS-slabs-by-loss.png
+
+Another use case, when ``slabinfo-gnuplot.sh`` can be useful, is when you
+need to compare slabs' behaviour "prior to" and "after" some code
+modification. To help you out there, ``slabinfo-gnuplot.sh`` script
+can 'merge' the `Slabcache Totals` sections from different
+measurements. To visually compare N plots:
+
+a) Collect as many STATS1, STATS2, .. STATSN files as you need::
+
+ while [ 1 ]; do slabinfo -X >> STATS<X>; sleep 1; done
+
+b) Pre-process those STATS files::
+
+ slabinfo-gnuplot.sh STATS1 STATS2 .. STATSN
+
+c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
+ generated pre-processed \*-totals::
+
+ slabinfo-gnuplot.sh -t STATS1-totals STATS2-totals .. STATSN-totals
+
+ This will produce a single plot (png file).
+
+ Plots, expectedly, can be large so some fluctuations or small spikes
+ can go unnoticed. To deal with that, ``slabinfo-gnuplot.sh`` has two
+ options to 'zoom-in'/'zoom-out':
+
+ a) ``-s %d,%d`` -- overwrites the default image width and height
+ b) ``-r %d,%d`` -- specifies a range of samples to use (for example,
+ in ``slabinfo -X >> FOO_STATS; sleep 1;`` case, using a ``-r
+ 40,60`` range will plot only samples collected between 40th and
+ 60th seconds).
+
+
+DebugFS files for SLUB
+======================
+
+For more information about current state of SLUB caches with the user tracking
+debug option enabled, debugfs files are available, typically under
+/sys/kernel/debug/slab/<cache>/ (created only for caches with enabled user
+tracking). There are 2 types of these files with the following debug
+information:
+
+1. alloc_traces::
+
+ Prints information about unique allocation traces of the currently
+ allocated objects. The output is sorted by frequency of each trace.
+
+ Information in the output:
+ Number of objects, allocating function, possible memory wastage of
+ kmalloc objects(total/per-object), minimal/average/maximal jiffies
+ since alloc, pid range of the allocating processes, cpu mask of
+ allocating cpus, numa node mask of origins of memory, and stack trace.
+
+ Example:::
+
+ 338 pci_alloc_dev+0x2c/0xa0 waste=521872/1544 age=290837/291891/293509 pid=1 cpus=106 nodes=0-1
+ __kmem_cache_alloc_node+0x11f/0x4e0
+ kmalloc_trace+0x26/0xa0
+ pci_alloc_dev+0x2c/0xa0
+ pci_scan_single_device+0xd2/0x150
+ pci_scan_slot+0xf7/0x2d0
+ pci_scan_child_bus_extend+0x4e/0x360
+ acpi_pci_root_create+0x32e/0x3b0
+ pci_acpi_scan_root+0x2b9/0x2d0
+ acpi_pci_root_add.cold.11+0x110/0xb0a
+ acpi_bus_attach+0x262/0x3f0
+ device_for_each_child+0xb7/0x110
+ acpi_dev_for_each_child+0x77/0xa0
+ acpi_bus_attach+0x108/0x3f0
+ device_for_each_child+0xb7/0x110
+ acpi_dev_for_each_child+0x77/0xa0
+ acpi_bus_attach+0x108/0x3f0
+
+2. free_traces::
+
+ Prints information about unique freeing traces of the currently allocated
+ objects. The freeing traces thus come from the previous life-cycle of the
+ objects and are reported as not available for objects allocated for the first
+ time. The output is sorted by frequency of each trace.
+
+ Information in the output:
+ Number of objects, freeing function, minimal/average/maximal jiffies since free,
+ pid range of the freeing processes, cpu mask of freeing cpus, and stack trace.
+
+ Example:::
+
+ 1980 <not-available> age=4294912290 pid=0 cpus=0
+ 51 acpi_ut_update_ref_count+0x6a6/0x782 age=236886/237027/237772 pid=1 cpus=1
+ kfree+0x2db/0x420
+ acpi_ut_update_ref_count+0x6a6/0x782
+ acpi_ut_update_object_reference+0x1ad/0x234
+ acpi_ut_remove_reference+0x7d/0x84
+ acpi_rs_get_prt_method_data+0x97/0xd6
+ acpi_get_irq_routing_table+0x82/0xc4
+ acpi_pci_irq_find_prt_entry+0x8e/0x2e0
+ acpi_pci_irq_lookup+0x3a/0x1e0
+ acpi_pci_irq_enable+0x77/0x240
+ pcibios_enable_device+0x39/0x40
+ do_pci_enable_device.part.0+0x5d/0xe0
+ pci_enable_device_flags+0xfc/0x120
+ pci_enable_device+0x13/0x20
+ virtio_pci_probe+0x9e/0x170
+ local_pci_probe+0x48/0x80
+ pci_device_probe+0x105/0x1c0
+
+Christoph Lameter, May 30, 2007
+Sergey Senozhatsky, October 23, 2015
diff --git a/Documentation/admin-guide/mm/swap_numa.rst b/Documentation/admin-guide/mm/swap_numa.rst
deleted file mode 100644
index 2e630627bcee..000000000000
--- a/Documentation/admin-guide/mm/swap_numa.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-===========================================
-Automatically bind swap device to numa node
-===========================================
-
-If the system has more than one swap device and swap device has the node
-information, we can make use of this information to decide which swap
-device to use in get_swap_pages() to get better performance.
-
-
-How to use this feature
-=======================
-
-Swap device has priority and that decides the order of it to be used. To make
-use of automatically binding, there is no need to manipulate priority settings
-for swap devices. e.g. on a 2 node machine, assume 2 swap devices swapA and
-swapB, with swapA attached to node 0 and swapB attached to node 1, are going
-to be swapped on. Simply swapping them on by doing::
-
- # swapon /dev/swapA
- # swapon /dev/swapB
-
-Then node 0 will use the two swap devices in the order of swapA then swapB and
-node 1 will use the two swap devices in the order of swapB then swapA. Note
-that the order of them being swapped on doesn't matter.
-
-A more complex example on a 4 node machine. Assume 6 swap devices are going to
-be swapped on: swapA and swapB are attached to node 0, swapC is attached to
-node 1, swapD and swapE are attached to node 2 and swapF is attached to node3.
-The way to swap them on is the same as above::
-
- # swapon /dev/swapA
- # swapon /dev/swapB
- # swapon /dev/swapC
- # swapon /dev/swapD
- # swapon /dev/swapE
- # swapon /dev/swapF
-
-Then node 0 will use them in the order of::
-
- swapA/swapB -> swapC -> swapD -> swapE -> swapF
-
-swapA and swapB will be used in a round robin mode before any other swap device.
-
-node 1 will use them in the order of::
-
- swapC -> swapA -> swapB -> swapD -> swapE -> swapF
-
-node 2 will use them in the order of::
-
- swapD/swapE -> swapA -> swapB -> swapC -> swapF
-
-Similaly, swapD and swapE will be used in a round robin mode before any
-other swap devices.
-
-node 3 will use them in the order of::
-
- swapF -> swapA -> swapB -> swapC -> swapD -> swapE
-
-
-Implementation details
-======================
-
-The current code uses a priority based list, swap_avail_list, to decide
-which swap device to use and if multiple swap devices share the same
-priority, they are used round robin. This change here replaces the single
-global swap_avail_list with a per-numa-node list, i.e. for each numa node,
-it sees its own priority based list of available swap devices. Swap
-device's priority can be promoted on its matching node's swap_avail_list.
-
-The current swap device's priority is set as: user can set a >=0 value,
-or the system will pick one starting from -1 then downwards. The priority
-value in the swap_avail_list is the negated value of the swap device's
-due to plist being sorted from low to high. The new policy doesn't change
-the semantics for priority >=0 cases, the previous starting from -1 then
-downwards now becomes starting from -2 then downwards and -1 is reserved
-as the promoted value. So if multiple swap devices are attached to the same
-node, they will all be promoted to priority -1 on that node's plist and will
-be used round robin before any other swap devices.
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 04eb45a2f940..5fbc3d89bb07 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -107,7 +107,7 @@ sysfs
Global THP controls
-------------------
-Transparent Hugepage Support for anonymous memory can be entirely disabled
+Transparent Hugepage Support for anonymous memory can be disabled
(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
regions (to avoid the risk of consuming more memory resources) or enabled
system wide. This can be achieved per-supported-THP-size with one of::
@@ -119,6 +119,11 @@ system wide. This can be achieved per-supported-THP-size with one of::
where <size> is the hugepage size being addressed, the available sizes
for which vary by system.
+.. note:: Setting "never" in all sysfs THP controls does **not** disable
+ Transparent Huge Pages globally. This is because ``madvise(...,
+ MADV_COLLAPSE)`` ignores these settings and collapses ranges to
+ PMD-sized huge pages unconditionally.
+
For example::
echo always >/sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
@@ -187,7 +192,9 @@ madvise
behaviour.
never
- should be self-explanatory.
+ should be self-explanatory. Note that ``madvise(...,
+ MADV_COLLAPSE)`` can still cause transparent huge pages to be
+ obtained even if this mode is specified everywhere.
By default kernel tries to use huge, PMD-mappable zero page on read
page fault to anonymous mapping. It's possible to disable huge zero
@@ -202,12 +209,57 @@ PMD-mappable transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
-khugepaged will be automatically started when one or more hugepage
-sizes are enabled (either by directly setting "always" or "madvise",
-or by setting "inherit" while the top-level enabled is set to "always"
-or "madvise"), and it'll be automatically shutdown when the last
-hugepage size is disabled (either by directly setting "never", or by
-setting "inherit" while the top-level enabled is set to "never").
+All THPs at fault and collapse time will be added to _deferred_list,
+and will therefore be split under memory presure if they are considered
+"underused". A THP is underused if the number of zero-filled pages in
+the THP is above max_ptes_none (see below). It is possible to disable
+this behaviour by writing 0 to shrink_underused, and enable it by writing
+1 to it::
+
+ echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused
+ echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused
+
+khugepaged will be automatically started when PMD-sized THP is enabled
+(either of the per-size anon control or the top-level control are set
+to "always" or "madvise"), and it'll be automatically shutdown when
+PMD-sized THP is disabled (when both the per-size anon control and the
+top-level control are "never")
+
+process THP controls
+--------------------
+
+A process can control its own THP behaviour using the ``PR_SET_THP_DISABLE``
+and ``PR_GET_THP_DISABLE`` pair of prctl(2) calls. The THP behaviour set using
+``PR_SET_THP_DISABLE`` is inherited across fork(2) and execve(2). These calls
+support the following arguments::
+
+ prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0):
+ This will disable THPs completely for the process, irrespective
+ of global THP controls or madvise(..., MADV_COLLAPSE) being used.
+
+ prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED, 0, 0):
+ This will disable THPs for the process except when the usage of THPs is
+ advised. Consequently, THPs will only be used when:
+ - Global THP controls are set to "always" or "madvise" and
+ madvise(..., MADV_HUGEPAGE) or madvise(..., MADV_COLLAPSE) is used.
+ - Global THP controls are set to "never" and madvise(..., MADV_COLLAPSE)
+ is used. This is the same behavior as if THPs would not be disabled on
+ a process level.
+ Note that MADV_COLLAPSE is currently always rejected if
+ madvise(..., MADV_NOHUGEPAGE) is set on an area.
+
+ prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0):
+ This will re-enable THPs for the process, as if they were never disabled.
+ Whether THPs will actually be used depends on global THP controls and
+ madvise() calls.
+
+ prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0):
+ This returns a value whose bits indicate how THP-disable is configured:
+ Bits
+ 1 0 Value Description
+ |0|0| 0 No THP-disable behaviour specified.
+ |0|1| 1 THP is entirely disabled for this process.
+ |1|1| 3 THP-except-advised mode is set for this process.
Khugepaged controls
-------------------
@@ -278,52 +330,130 @@ collapsed, resulting fewer pages being collapsed into
THPs, and lower memory access performance.
``max_ptes_shared`` specifies how many pages can be shared across multiple
-processes. Exceeding the number would block the collapse::
+processes. khugepaged might treat pages of THPs as shared if any page of
+that THP is shared. Exceeding the number would block the collapse::
/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_shared
A higher value may increase memory footprint for some workloads.
-Boot parameter
-==============
-
-You can change the sysfs boot time defaults of Transparent Hugepage
-Support by passing the parameter ``transparent_hugepage=always`` or
-``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
-to the kernel command line.
+Boot parameters
+===============
+
+You can change the sysfs boot time default for the top-level "enabled"
+control by passing the parameter ``transparent_hugepage=always`` or
+``transparent_hugepage=madvise`` or ``transparent_hugepage=never`` to the
+kernel command line.
+
+Alternatively, each supported anonymous THP size can be controlled by
+passing ``thp_anon=<size>[KMG],<size>[KMG]:<state>;<size>[KMG]-<size>[KMG]:<state>``,
+where ``<size>`` is the THP size (must be a power of 2 of PAGE_SIZE and
+supported anonymous THP) and ``<state>`` is one of ``always``, ``madvise``,
+``never`` or ``inherit``.
+
+For example, the following will set 16K, 32K, 64K THP to ``always``,
+set 128K, 512K to ``inherit``, set 256K to ``madvise`` and 1M, 2M
+to ``never``::
+
+ thp_anon=16K-64K:always;128K,512K:inherit;256K:madvise;1M-2M:never
+
+``thp_anon=`` may be specified multiple times to configure all THP sizes as
+required. If ``thp_anon=`` is specified at least once, any anon THP sizes
+not explicitly configured on the command line are implicitly set to
+``never``.
+
+``transparent_hugepage`` setting only affects the global toggle. If
+``thp_anon`` is not specified, PMD_ORDER THP will default to ``inherit``.
+However, if a valid ``thp_anon`` setting is provided by the user, the
+PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
+is not defined within a valid ``thp_anon``, its policy will default to
+``never``.
+
+Similarly to ``transparent_hugepage``, you can control the hugepage
+allocation policy for the internal shmem mount by using the kernel parameter
+``transparent_hugepage_shmem=<policy>``, where ``<policy>`` is one of the
+seven valid policies for shmem (``always``, ``within_size``, ``advise``,
+``never``, ``deny``, and ``force``).
+
+Similarly to ``transparent_hugepage_shmem``, you can control the default
+hugepage allocation policy for the tmpfs mount by using the kernel parameter
+``transparent_hugepage_tmpfs=<policy>``, where ``<policy>`` is one of the
+four valid policies for tmpfs (``always``, ``within_size``, ``advise``,
+``never``). The tmpfs mount default policy is ``never``.
+
+Additionally, Kconfig options are available to set the default hugepage
+policies for shmem (``CONFIG_TRANSPARENT_HUGEPAGE_SHMEM_HUGE_*``) and tmpfs
+(``CONFIG_TRANSPARENT_HUGEPAGE_TMPFS_HUGE_*``) at build time. Refer to the
+Kconfig help for more details.
+
+In the same manner as ``thp_anon`` controls each supported anonymous THP
+size, ``thp_shmem`` controls each supported shmem THP size. ``thp_shmem``
+has the same format as ``thp_anon``, but also supports the policy
+``within_size``.
+
+``thp_shmem=`` may be specified multiple times to configure all THP sizes
+as required. If ``thp_shmem=`` is specified at least once, any shmem THP
+sizes not explicitly configured on the command line are implicitly set to
+``never``.
+
+``transparent_hugepage_shmem`` setting only affects the global toggle. If
+``thp_shmem`` is not specified, PMD_ORDER hugepage will default to
+``inherit``. However, if a valid ``thp_shmem`` setting is provided by the
+user, the PMD_ORDER hugepage policy will be overridden. If the policy for
+PMD_ORDER is not defined within a valid ``thp_shmem``, its policy will
+default to ``never``.
Hugepages in tmpfs/shmem
========================
-You can control hugepage allocation policy in tmpfs with mount option
-``huge=``. It can have following values:
+Traditionally, tmpfs only supported a single huge page size ("PMD"). Today,
+it also supports smaller sizes just like anonymous memory, often referred
+to as "multi-size THP" (mTHP). Huge pages of any size are commonly
+represented in the kernel as "large folios".
+
+While there is fine control over the huge page sizes to use for the internal
+shmem mount (see below), ordinary tmpfs mounts will make use of all available
+huge page sizes without any control over the exact sizes, behaving more like
+other file systems.
+
+tmpfs mounts
+------------
+
+The THP allocation policy for tmpfs mounts can be adjusted using the mount
+option: ``huge=``. It can have following values:
always
Attempt to allocate huge pages every time we need a new page;
+ Always try PMD-sized huge pages first, and fall back to smaller-sized
+ huge pages if the PMD-sized huge page allocation fails;
never
- Do not allocate huge pages;
+ Do not allocate huge pages. Note that ``madvise(..., MADV_COLLAPSE)``
+ can still cause transparent huge pages to be obtained even if this mode
+ is specified everywhere;
within_size
- Only allocate huge page if it will be fully within i_size.
- Also respect fadvise()/madvise() hints;
+ Only allocate huge page if it will be fully within i_size;
+ Always try PMD-sized huge pages first, and fall back to smaller-sized
+ huge pages if the PMD-sized huge page allocation fails;
+ Also respect madvise() hints;
advise
- Only allocate huge pages if requested with fadvise()/madvise();
+ Only allocate huge pages if requested with madvise();
+
+Remember, that the kernel may use huge pages of all available sizes, and
+that no fine control as for the internal tmpfs mount is available.
-The default policy is ``never``.
+The default policy in the past was ``never``, but it can now be adjusted
+using the kernel parameter ``transparent_hugepage_tmpfs=<policy>``.
``mount -o remount,huge= /mountpoint`` works fine after mount: remounting
``huge=never`` will not attempt to break up huge pages at all, just stop more
from being allocated.
-There's also sysfs knob to control hugepage allocation policy for internal
-shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
-is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
-MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
-
-In addition to policies listed above, shmem_enabled allows two further
-values:
+In addition to policies listed above, the sysfs knob
+/sys/kernel/mm/transparent_hugepage/shmem_enabled will affect the
+allocation policy of tmpfs mounts, when set to the following values:
deny
For use in emergencies, to force the huge option off from
@@ -331,6 +461,44 @@ deny
force
Force the huge option on for all - very useful for testing;
+shmem / internal tmpfs
+----------------------
+The mount internal tmpfs mount is used for SysV SHM, memfds, shared anonymous
+mmaps (of /dev/zero or MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
+
+To control the THP allocation policy for this internal tmpfs mount, the
+sysfs knob /sys/kernel/mm/transparent_hugepage/shmem_enabled and the knobs
+per THP size in
+'/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/shmem_enabled'
+can be used.
+
+The global knob has the same semantics as the ``huge=`` mount options
+for tmpfs mounts, except that the different huge page sizes can be controlled
+individually, and will only use the setting of the global knob when the
+per-size knob is set to 'inherit'.
+
+The options 'force' and 'deny' are dropped for the individual sizes, which
+are rather testing artifacts from the old ages.
+
+always
+ Attempt to allocate <size> huge pages every time we need a new page;
+
+inherit
+ Inherit the top-level "shmem_enabled" value. By default, PMD-sized hugepages
+ have enabled="inherit" and all other hugepage sizes have enabled="never";
+
+never
+ Do not allocate <size> huge pages. Note that ``madvise(...,
+ MADV_COLLAPSE)`` can still cause transparent huge pages to be obtained
+ even if this mode is specified everywhere;
+
+within_size
+ Only allocate <size> huge page if it will be fully within i_size.
+ Also respect madvise() hints;
+
+advise
+ Only allocate <size> huge pages if requested with madvise();
+
Need of application restart
===========================
@@ -343,10 +511,6 @@ also applies to the regions registered in khugepaged.
Monitoring usage
================
-.. note::
- Currently the below counters only record events relating to
- PMD-sized THP. Events relating to other THP sizes are not included.
-
The number of PMD-sized anonymous transparent huge pages currently used by the
system is available by reading the AnonHugePages field in ``/proc/meminfo``.
To identify what applications are using PMD-sized anonymous transparent huge
@@ -358,7 +522,7 @@ AnonHugePmdMapped).
The number of file transparent huge pages mapped to userspace is available
by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
To identify what applications are mapping file transparent huge pages, it
-is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields
+is necessary to read ``/proc/PID/smaps`` and count the FilePmdMapped fields
for each mapping.
Note that reading the smaps file is expensive and reading it
@@ -369,7 +533,7 @@ monitor how successfully the system is providing huge pages for use.
thp_fault_alloc
is incremented every time a huge page is successfully
- allocated to handle a page fault.
+ allocated and charged to handle a page fault.
thp_collapse_alloc
is incremented by khugepaged when it has found
@@ -377,7 +541,7 @@ thp_collapse_alloc
successfully allocated a new huge page to store the data.
thp_fault_fallback
- is incremented if a page fault fails to allocate
+ is incremented if a page fault fails to allocate or charge
a huge page and instead falls back to using small pages.
thp_fault_fallback_charge
@@ -391,20 +555,23 @@ thp_collapse_alloc_failed
the allocation.
thp_file_alloc
- is incremented every time a file huge page is successfully
- allocated.
+ is incremented every time a shmem huge page is successfully
+ allocated (Note that despite being named after "file", the counter
+ measures only shmem).
thp_file_fallback
- is incremented if a file huge page is attempted to be allocated
- but fails and instead falls back to using small pages.
+ is incremented if a shmem huge page is attempted to be allocated
+ but fails and instead falls back to using small pages. (Note that
+ despite being named after "file", the counter measures only shmem).
thp_file_fallback_charge
- is incremented if a file huge page cannot be charged and instead
+ is incremented if a shmem huge page cannot be charged and instead
falls back to using small pages even though the allocation was
- successful.
+ successful. (Note that despite being named after "file", the
+ counter measures only shmem).
thp_file_mapped
- is incremented every time a file huge page is mapped into
+ is incremented every time a file or shmem huge page is mapped into
user address space.
thp_split_page
@@ -423,6 +590,12 @@ thp_deferred_split_page
splitting it would free up some memory. Pages on split queue are
going to be split under memory pressure.
+thp_underused_split_page
+ is incremented when a huge page on the split queue was split
+ because it was underused. A THP is underused if the number of
+ zero pages in the THP is above a certain threshold
+ (/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none).
+
thp_split_pmd
is incremented every time a PMD split into table of PTEs.
This can happen, for instance, when application calls mprotect() or
@@ -447,6 +620,92 @@ thp_swpout_fallback
Usually because failed to allocate some continuous swap space
for the huge page.
+In /sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/stats, There are
+also individual counters for each huge page size, which can be utilized to
+monitor the system's effectiveness in providing huge pages for usage. Each
+counter has its own corresponding file.
+
+anon_fault_alloc
+ is incremented every time a huge page is successfully
+ allocated and charged to handle a page fault.
+
+anon_fault_fallback
+ is incremented if a page fault fails to allocate or charge
+ a huge page and instead falls back to using huge pages with
+ lower orders or small pages.
+
+anon_fault_fallback_charge
+ is incremented if a page fault fails to charge a huge page and
+ instead falls back to using huge pages with lower orders or
+ small pages even though the allocation was successful.
+
+zswpout
+ is incremented every time a huge page is swapped out to zswap in one
+ piece without splitting.
+
+swpin
+ is incremented every time a huge page is swapped in from a non-zswap
+ swap device in one piece.
+
+swpin_fallback
+ is incremented if swapin fails to allocate or charge a huge page
+ and instead falls back to using huge pages with lower orders or
+ small pages.
+
+swpin_fallback_charge
+ is incremented if swapin fails to charge a huge page and instead
+ falls back to using huge pages with lower orders or small pages
+ even though the allocation was successful.
+
+swpout
+ is incremented every time a huge page is swapped out to a non-zswap
+ swap device in one piece without splitting.
+
+swpout_fallback
+ is incremented if a huge page has to be split before swapout.
+ Usually because failed to allocate some continuous swap space
+ for the huge page.
+
+shmem_alloc
+ is incremented every time a shmem huge page is successfully
+ allocated.
+
+shmem_fallback
+ is incremented if a shmem huge page is attempted to be allocated
+ but fails and instead falls back to using small pages.
+
+shmem_fallback_charge
+ is incremented if a shmem huge page cannot be charged and instead
+ falls back to using small pages even though the allocation was
+ successful.
+
+split
+ is incremented every time a huge page is successfully split into
+ smaller orders. This can happen for a variety of reasons but a
+ common reason is that a huge page is old and is being reclaimed.
+
+split_failed
+ is incremented if kernel fails to split huge
+ page. This can happen if the page was pinned by somebody.
+
+split_deferred
+ is incremented when a huge page is put onto split queue.
+ This happens when a huge page is partially unmapped and splitting
+ it would free up some memory. Pages on split queue are going to
+ be split under memory pressure, if splitting is possible.
+
+nr_anon
+ the number of anonymous THP we have in the whole system. These THPs
+ might be currently entirely mapped or have partially unmapped/unused
+ subpages.
+
+nr_anon_partially_mapped
+ the number of anonymous THP which are likely partially mapped, possibly
+ wasting memory, and have been queued for deferred memory reclamation.
+ Note that in corner some cases (e.g., failed migration), we might detect
+ an anonymous THP as "partially mapped" and count it here, even though it
+ is not actually partially mapped anymore.
+
As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help
diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst
index b42132969e31..2464425c783d 100644
--- a/Documentation/admin-guide/mm/zswap.rst
+++ b/Documentation/admin-guide/mm/zswap.rst
@@ -53,28 +53,17 @@ Zswap receives pages for compression from the swap subsystem and is able to
evict pages from its own compressed pool on an LRU basis and write them back to
the backing swap device in the case that the compressed pool is full.
-Zswap makes use of zpool for the managing the compressed memory pool. Each
-allocation in zpool is not directly accessible by address. Rather, a handle is
+Zswap makes use of zsmalloc for the managing the compressed memory pool. Each
+allocation in zsmalloc is not directly accessible by address. Rather, a handle is
returned by the allocation routine and that handle must be mapped before being
accessed. The compressed memory pool grows on demand and shrinks as compressed
-pages are freed. The pool is not preallocated. By default, a zpool
-of type selected in ``CONFIG_ZSWAP_ZPOOL_DEFAULT`` Kconfig option is created,
-but it can be overridden at boot time by setting the ``zpool`` attribute,
-e.g. ``zswap.zpool=zbud``. It can also be changed at runtime using the sysfs
-``zpool`` attribute, e.g.::
+pages are freed. The pool is not preallocated.
- echo zbud > /sys/module/zswap/parameters/zpool
-
-The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which
-means the compression ratio will always be 2:1 or worse (because of half-full
-zbud pages). The zsmalloc type zpool has a more complex compressed page
-storage method, and it can achieve greater storage densities.
-
-When a swap page is passed from swapout to zswap, zswap maintains a mapping
-of the swap entry, a combination of the swap type and swap offset, to the zpool
+When a swap page is passed from swapout to zswap, zswap maintains a mapping of
+the swap entry, a combination of the swap type and swap offset, to the zsmalloc
handle that references that compressed swap page. This mapping is achieved
-with a red-black tree per swap type. The swap offset is the search key for the
-tree nodes.
+with an xarray per swap type. The swap offset is the search key for the xarray
+nodes.
During a page fault on a PTE that is a swap entry, the swapin code calls the
zswap load function to decompress the page into the page allocated by the page
@@ -98,11 +87,11 @@ attribute, e.g.::
echo lzo > /sys/module/zswap/parameters/compressor
-When the zpool and/or compressor parameter is changed at runtime, any existing
-compressed pages are not modified; they are left in their own zpool. When a
-request is made for a page in an old zpool, it is uncompressed using its
-original compressor. Once all pages are removed from an old zpool, the zpool
-and its compressor are freed.
+When the compressor parameter is changed at runtime, any existing compressed
+pages are not modified; they are left in their own pool. When a request is
+made for a page in an old pool, it is uncompressed using its original
+compressor. Once all pages are removed from an old pool, the pool and its
+compressor are freed.
Some of the pages in zswap are same-value filled pages (i.e. contents of the
page have same value or repetitive pattern). These pages include zero-filled
@@ -111,35 +100,6 @@ checked if it is a same-value filled page before compressing it. If true, the
compressed length of the page is set to zero and the pattern or same-filled
value is stored.
-Same-value filled pages identification feature is enabled by default and can be
-disabled at boot time by setting the ``same_filled_pages_enabled`` attribute
-to 0, e.g. ``zswap.same_filled_pages_enabled=0``. It can also be enabled and
-disabled at runtime using the sysfs ``same_filled_pages_enabled``
-attribute, e.g.::
-
- echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
-
-When zswap same-filled page identification is disabled at runtime, it will stop
-checking for the same-value filled pages during store operation.
-In other words, every page will be then considered non-same-value filled.
-However, the existing pages which are marked as same-value filled pages remain
-stored unchanged in zswap until they are either loaded or invalidated.
-
-In some circumstances it might be advantageous to make use of just the zswap
-ability to efficiently store same-filled pages without enabling the whole
-compressed page storage.
-In this case the handling of non-same-value pages by zswap (enabled by default)
-can be disabled by setting the ``non_same_filled_pages_enabled`` attribute
-to 0, e.g. ``zswap.non_same_filled_pages_enabled=0``.
-It can also be enabled and disabled at runtime using the sysfs
-``non_same_filled_pages_enabled`` attribute, e.g.::
-
- echo 1 > /sys/module/zswap/parameters/non_same_filled_pages_enabled
-
-Disabling both ``zswap.same_filled_pages_enabled`` and
-``zswap.non_same_filled_pages_enabled`` effectively disables accepting any new
-pages by zswap.
-
To prevent zswap from shrinking pool when zswap is full and there's a high
pressure on swap (this will result in flipping pages in and out zswap pool
without any real benefit but with a performance drop for the system), a
@@ -155,7 +115,7 @@ Setting this parameter to 100 will disable the hysteresis.
Some users cannot tolerate the swapping that comes with zswap store failures
and zswap writebacks. Swapping can be disabled entirely (without disabling
-zswap itself) on a cgroup-basis as follows:
+zswap itself) on a cgroup-basis as follows::
echo 0 > /sys/fs/cgroup/<cgroup-name>/memory.zswap.writeback
@@ -166,7 +126,7 @@ writeback (because the same pages might be rejected again and again).
When there is a sizable amount of cold memory residing in the zswap pool, it
can be advantageous to proactively write these cold pages to swap and reclaim
the memory for other use cases. By default, the zswap shrinker is disabled.
-User can enable it as follows:
+User can enable it as follows::
echo Y > /sys/module/zswap/parameters/shrinker_enabled
diff --git a/Documentation/admin-guide/namespaces/resource-control.rst b/Documentation/admin-guide/namespaces/resource-control.rst
index 369556e00f0c..553a44803231 100644
--- a/Documentation/admin-guide/namespaces/resource-control.rst
+++ b/Documentation/admin-guide/namespaces/resource-control.rst
@@ -1,17 +1,17 @@
-===========================
-Namespaces research control
-===========================
+====================================
+User namespaces and resource control
+====================================
-There are a lot of kinds of objects in the kernel that don't have
-individual limits or that have limits that are ineffective when a set
-of processes is allowed to switch user ids. With user namespaces
-enabled in a kernel for people who don't trust their users or their
-users programs to play nice this problems becomes more acute.
+The kernel contains many kinds of objects that either don't have
+individual limits or that have limits which are ineffective when
+a set of processes is allowed to switch their UID. On a system
+where the admins don't trust their users or their users' programs,
+user namespaces expose the system to potential misuse of resources.
-Therefore it is recommended that memory control groups be enabled in
-kernels that enable user namespaces, and it is further recommended
-that userspace configure memory control groups to limit how much
-memory user's they don't trust to play nice can use.
+In order to mitigate this, we recommend that admins enable memory
+control groups on any system that enables user namespaces.
+Furthermore, we recommend that admins configure the memory control
+groups to limit the maximum memory usable by any untrusted user.
Memory control groups can be configured by installing the libcgroup
package present on most distros editing /etc/cgrules.conf,
diff --git a/Documentation/admin-guide/nfs/nfsroot.rst b/Documentation/admin-guide/nfs/nfsroot.rst
index 135218f33394..06990309c6ff 100644
--- a/Documentation/admin-guide/nfs/nfsroot.rst
+++ b/Documentation/admin-guide/nfs/nfsroot.rst
@@ -342,7 +342,7 @@ They depend on various facilities being available:
When using pxelinux, the kernel image is specified using
"kernel <relative-path-below /tftpboot>". The nfsroot parameters
are passed to the kernel by adding them to the "append" line.
- It is common to use serial console in conjunction with pxeliunx,
+ It is common to use serial console in conjunction with pxelinux,
see Documentation/admin-guide/serial-console.rst for more information.
For more information on isolinux, including how to create bootdisks
diff --git a/Documentation/admin-guide/nvme-multipath.rst b/Documentation/admin-guide/nvme-multipath.rst
new file mode 100644
index 000000000000..97ca1ccef459
--- /dev/null
+++ b/Documentation/admin-guide/nvme-multipath.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Linux NVMe multipath
+====================
+
+This document describes NVMe multipath and its path selection policies supported
+by the Linux NVMe host driver.
+
+
+Introduction
+============
+
+The NVMe multipath feature in Linux integrates namespaces with the same
+identifier into a single block device. Using multipath enhances the reliability
+and stability of I/O access while improving bandwidth performance. When a user
+sends I/O to this merged block device, the multipath mechanism selects one of
+the underlying block devices (paths) according to the configured policy.
+Different policies result in different path selections.
+
+
+Policies
+========
+
+All policies follow the ANA (Asymmetric Namespace Access) mechanism, meaning
+that when an optimized path is available, it will be chosen over a non-optimized
+one. Current the NVMe multipath policies include numa(default), round-robin and
+queue-depth.
+
+To set the desired policy (e.g., round-robin), use one of the following methods:
+ 1. echo -n "round-robin" > /sys/module/nvme_core/parameters/iopolicy
+ 2. or add the "nvme_core.iopolicy=round-robin" to cmdline.
+
+
+NUMA
+----
+
+The NUMA policy selects the path closest to the NUMA node of the current CPU for
+I/O distribution. This policy maintains the nearest paths to each NUMA node
+based on network interface connections.
+
+When to use the NUMA policy:
+ 1. Multi-core Systems: Optimizes memory access in multi-core and
+ multi-processor systems, especially under NUMA architecture.
+ 2. High Affinity Workloads: Binds I/O processing to the CPU to reduce
+ communication and data transfer delays across nodes.
+
+
+Round-Robin
+-----------
+
+The round-robin policy distributes I/O requests evenly across all paths to
+enhance throughput and resource utilization. Each I/O operation is sent to the
+next path in sequence.
+
+When to use the round-robin policy:
+ 1. Balanced Workloads: Effective for balanced and predictable workloads with
+ similar I/O size and type.
+ 2. Homogeneous Path Performance: Utilizes all paths efficiently when
+ performance characteristics (e.g., latency, bandwidth) are similar.
+
+
+Queue-Depth
+-----------
+
+The queue-depth policy manages I/O requests based on the current queue depth
+of each path, selecting the path with the least number of in-flight I/Os.
+
+When to use the queue-depth policy:
+ 1. High load with small I/Os: Effectively balances load across paths when
+ the load is high, and I/O operations consist of small, relatively
+ fixed-sized requests.
diff --git a/Documentation/admin-guide/perf/arm-ni.rst b/Documentation/admin-guide/perf/arm-ni.rst
new file mode 100644
index 000000000000..d26a8f697c36
--- /dev/null
+++ b/Documentation/admin-guide/perf/arm-ni.rst
@@ -0,0 +1,17 @@
+====================================
+Arm Network-on Chip Interconnect PMU
+====================================
+
+NI-700 and friends implement a distinct PMU for each clock domain within the
+interconnect. Correspondingly, the driver exposes multiple PMU devices named
+arm_ni_<x>_cd_<y>, where <x> is an (arbitrary) instance identifier and <y> is
+the clock domain ID within that particular instance. If multiple NI instances
+exist within a system, the PMU devices can be correlated with the underlying
+hardware instance via sysfs parentage.
+
+Each PMU exposes base event aliases for the interface types present in its clock
+domain. These require qualifying with the "eventid" and "nodeid" parameters
+to specify the event code to count and the interface at which to count it
+(per the configured hardware ID as reflected in the xxNI_NODE_INFO register).
+The exception is the "cycles" alias for the PMU cycle counter, which is encoded
+with the PMU node type and needs no further qualification.
diff --git a/Documentation/admin-guide/perf/dwc_pcie_pmu.rst b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
index d47cd229d710..167f9281fbf5 100644
--- a/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
+++ b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
@@ -16,8 +16,8 @@ provides the following two features:
- one 64-bit counter for Time Based Analysis (RX/TX data throughput and
time spent in each low-power LTSSM state) and
-- one 32-bit counter for Event Counting (error and non-error events for
- a specified lane)
+- one 32-bit counter per event for Event Counting (error and non-error
+ events for a specified lane)
Note: There is no interrupt for counter overflow.
@@ -46,41 +46,41 @@ Some of the events only exist for specific configurations.
DesignWare Cores (DWC) PCIe PMU Driver
=======================================
-This driver adds PMU devices for each PCIe Root Port named based on the BDF of
+This driver adds PMU devices for each PCIe Root Port named based on the SBDF of
the Root Port. For example,
- 30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
+ 0001:30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
-the PMU device name for this Root Port is dwc_rootport_3018.
+the PMU device name for this Root Port is dwc_rootport_13018.
The DWC PCIe PMU driver registers a perf PMU driver, which provides
description of available events and configuration options in sysfs, see
-/sys/bus/event_source/devices/dwc_rootport_{bdf}.
+/sys/bus/event_source/devices/dwc_rootport_{sbdf}.
The "format" directory describes format of the config fields of the
perf_event_attr structure. The "events" directory provides configuration
templates for all documented events. For example,
-"Rx_PCIe_TLP_Data_Payload" is an equivalent of "eventid=0x22,type=0x1".
+"rx_pcie_tlp_data_payload" is an equivalent of "eventid=0x21,type=0x0".
The "perf list" command shall list the available events from sysfs, e.g.::
$# perf list | grep dwc_rootport
<...>
- dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
+ dwc_rootport_13018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
<...>
- dwc_rootport_3018/rx_memory_read,lane=?/ [Kernel PMU event]
+ dwc_rootport_13018/rx_memory_read,lane=?/ [Kernel PMU event]
Time Based Analysis Event Usage
-------------------------------
Example usage of counting PCIe RX TLP data payload (Units of bytes)::
- $# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/
+ $# perf stat -a -e dwc_rootport_13018/Rx_PCIe_TLP_Data_Payload/
The average RX/TX bandwidth can be calculated using the following formula:
- PCIe RX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window
- PCIe TX Bandwidth = Tx_PCIe_TLP_Data_Payload / Measure_Time_Window
+ PCIe RX Bandwidth = rx_pcie_tlp_data_payload / Measure_Time_Window
+ PCIe TX Bandwidth = tx_pcie_tlp_data_payload / Measure_Time_Window
Lane Event Usage
-------------------------------
@@ -88,7 +88,7 @@ Lane Event Usage
Each lane has the same event set and to avoid generating a list of hundreds
of events, the user need to specify the lane ID explicitly, e.g.::
- $# perf stat -a -e dwc_rootport_3018/rx_memory_read,lane=4/
+ $# perf stat -a -e dwc_rootport_13018/rx_memory_read,lane=4/
The driver does not support sampling, therefore "perf record" will not
work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst b/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst
new file mode 100644
index 000000000000..2ec0249e37b6
--- /dev/null
+++ b/Documentation/admin-guide/perf/fujitsu_uncore_pmu.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+================================================
+Fujitsu Uncore Performance Monitoring Unit (PMU)
+================================================
+
+This driver supports the Uncore MAC PMUs and the Uncore PCI PMUs found
+in Fujitsu chips.
+Each MAC PMU on these chips is exposed as a uncore perf PMU with device name
+mac_iod<iod>_mac<mac>_ch<ch>.
+And each PCI PMU on these chips is exposed as a uncore perf PMU with device name
+pci_iod<iod>_pci<pci>.
+
+The driver provides a description of its available events and configuration
+options in sysfs, see /sys/bus/event_sources/devices/mac_iod<iod>_mac<mac>_ch<ch>/
+and /sys/bus/event_sources/devices/pci_iod<iod>_pci<pci>/.
+This driver exports:
+
+- formats, used by perf user space and other tools to configure events
+- events, used by perf user space and other tools to create events
+ symbolically, e.g.::
+
+ perf stat -a -e mac_iod0_mac0_ch0/event=0x21/ ls
+ perf stat -a -e pci_iod0_pci0/event=0x24/ ls
+
+- cpumask, used by perf user space and other tools to know on which CPUs
+ to open the events
+
+This driver supports the following events for MAC:
+
+- cycles
+ This event counts MAC cycles at MAC frequency.
+- read-count
+ This event counts the number of read requests to MAC.
+- read-count-request
+ This event counts the number of read requests including retry to MAC.
+- read-count-return
+ This event counts the number of responses to read requests to MAC.
+- read-count-request-pftgt
+ This event counts the number of read requests including retry with PFTGT
+ flag.
+- read-count-request-normal
+ This event counts the number of read requests including retry without PFTGT
+ flag.
+- read-count-return-pftgt-hit
+ This event counts the number of responses to read requests which hit the
+ PFTGT buffer.
+- read-count-return-pftgt-miss
+ This event counts the number of responses to read requests which miss the
+ PFTGT buffer.
+- read-wait
+ This event counts outstanding read requests issued by DDR memory controller
+ per cycle.
+- write-count
+ This event counts the number of write requests to MAC (including zero write,
+ full write, partial write, write cancel).
+- write-count-write
+ This event counts the number of full write requests to MAC (not including
+ zero write).
+- write-count-pwrite
+ This event counts the number of partial write requests to MAC.
+- memory-read-count
+ This event counts the number of read requests from MAC to memory.
+- memory-write-count
+ This event counts the number of full write requests from MAC to memory.
+- memory-pwrite-count
+ This event counts the number of partial write requests from MAC to memory.
+- ea-mac
+ This event counts energy consumption of MAC.
+- ea-memory
+ This event counts energy consumption of memory.
+- ea-memory-mac-write
+ This event counts the number of write requests from MAC to memory.
+- ea-ha
+ This event counts energy consumption of HA.
+
+ 'ea' is the abbreviation for 'Energy Analyzer'.
+
+Examples for use with perf::
+
+ perf stat -e mac_iod0_mac0_ch0/ea-mac/ ls
+
+And, this driver supports the following events for PCI:
+
+- pci-port0-cycles
+ This event counts PCI cycles at PCI frequency in port0.
+- pci-port0-read-count
+ This event counts read transactions for data transfer in port0.
+- pci-port0-read-count-bus
+ This event counts read transactions for bus usage in port0.
+- pci-port0-write-count
+ This event counts write transactions for data transfer in port0.
+- pci-port0-write-count-bus
+ This event counts write transactions for bus usage in port0.
+- pci-port1-cycles
+ This event counts PCI cycles at PCI frequency in port1.
+- pci-port1-read-count
+ This event counts read transactions for data transfer in port1.
+- pci-port1-read-count-bus
+ This event counts read transactions for bus usage in port1.
+- pci-port1-write-count
+ This event counts write transactions for data transfer in port1.
+- pci-port1-write-count-bus
+ This event counts write transactions for bus usage in port1.
+- ea-pci
+ This event counts energy consumption of PCI.
+
+ 'ea' is the abbreviation for 'Energy Analyzer'.
+
+Examples for use with perf::
+
+ perf stat -e pci_iod0_pci0/ea-pci/ ls
+
+Given that these are uncore PMUs the driver does not support sampling, therefore
+"perf record" will not work. Per-task perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
index 7e863662e2d4..083ca50de896 100644
--- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@@ -28,7 +28,9 @@ The "identifier" sysfs file allows users to identify the version of the
PMU hardware device.
The "bus" sysfs file allows users to get the bus number of Root Ports
-monitored by PMU.
+monitored by PMU. Furthermore users can get the Root Ports range in
+[bdf_min, bdf_max] from "bdf_min" and "bdf_max" sysfs attributes
+respectively.
Example usage of perf::
@@ -37,9 +39,21 @@ Example usage of perf::
hisi_pcie0_core0/rx_mwr_cnt/ [kernel PMU event]
------------------------------------------
- $# perf stat -e hisi_pcie0_core0/rx_mwr_latency/
- $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt/
- $# perf stat -g -e hisi_pcie0_core0/rx_mwr_latency/ -e hisi_pcie0_core0/rx_mwr_cnt/
+ $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0xffff/
+ $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/
+
+The related events usually used to calculate the bandwidth, latency or others.
+They need to start and end counting at the same time, therefore related events
+are best used in the same event group to get the expected value. There are two
+ways to know if they are related events:
+
+a) By event name, such as the latency events "xxx_latency, xxx_cnt" or
+ bandwidth events "xxx_flux, xxx_time".
+b) By event type, such as "event=0xXXXX, event=0x1XXXX".
+
+Example usage of perf group::
+
+ $# perf stat -e "{hisi_pcie0_core0/rx_mwr_latency,port=0xffff/,hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/}"
The current driver does not support sampling. So "perf record" is unsupported.
Also attach to a task is unsupported for PCIe PMU.
@@ -51,8 +65,12 @@ Filter options
PMU could only monitor the performance of traffic downstream target Root
Ports or downstream target Endpoint. PCIe PMU driver support "port" and
- "bdf" interfaces for users, and these two interfaces aren't supported at the
- same time.
+ "bdf" interfaces for users.
+ Please notice that, one of these two interfaces must be set, and these two
+ interfaces aren't supported at the same time. If they are both set, only
+ "port" filter is valid.
+ If "port" filter not being set or is set explicitly to zero (default), the
+ "bdf" filter will be in effect, because "bdf=0" meaning 0000:000:00.0.
- port
@@ -95,7 +113,7 @@ Filter options
Example usage of perf::
- $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
+ $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,trig_len=0x4,trig_mode=1/ sleep 5
3. Threshold filter
@@ -109,7 +127,7 @@ Filter options
Example usage of perf::
- $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
+ $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,thr_len=0x4,thr_mode=1/ sleep 5
4. TLP Length filter
@@ -127,4 +145,4 @@ Filter options
Example usage of perf::
- $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
+ $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,len_mode=0x1/ sleep 5
diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst
index e0174d20809a..d56b2d690709 100644
--- a/Documentation/admin-guide/perf/hisi-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -18,10 +18,10 @@ HiSilicon SoC uncore PMU driver
Each device PMU has separate registers for event counting, control and
interrupt, and the PMU driver shall register perf PMU drivers like L3C,
HHA and DDRC etc. The available events and configuration options shall
-be described in the sysfs, see:
+be described in the sysfs, see::
+
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>
-/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
-/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
The "perf list" command shall list the available events from sysfs.
Each L3C, HHA and DDRC is registered as a separate PMU with perf. The PMU
@@ -36,7 +36,10 @@ e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in
SCCL ID #1.
The driver also provides a "cpumask" sysfs attribute, which shows the CPU core
-ID used to count the uncore PMU event.
+ID used to count the uncore PMU event. An "associated_cpus" sysfs attribute is
+also provided to show the CPUs associated with this PMU. The "cpumask" indicates
+the CPUs to open the events, usually as a hint for userspaces tools like perf.
+It only contains one associated CPU from the "associated_cpus".
Example usage of perf::
@@ -63,6 +66,10 @@ specified as a bitmap::
This will only count the operations from core/thread 0 and 1 in this cluster.
+User should not use tt_core_deprecated to specify the core/thread filtering.
+This option is provided for backward compatiblility and only support 8bit
+which may not cover all the core/thread sharing L3C.
+
2. Tracetag allow the user to chose to count only read, write or atomic
operations via the tt_req parameeter in perf. The default value counts all
operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101
@@ -107,8 +114,52 @@ uring channel. It is 2 bits. Some important codes are as follows:
- 2'b11: count the events which sent to the uring_ext (MATA) channel;
- 2'b01: is the same as 2'b11;
- 2'b10: count the events which sent to the uring (non-MATA) channel;
-- 2'b00: default value, count the events which sent to the both uring and
- uring_ext channel;
+- 2'b00: default value, count the events which sent to both uring and
+ uring_ext channels;
+
+6. ch: NoC PMU supports filtering the event counts of certain transaction
+channel with this option. The current supported channels are as follows:
+
+- 3'b010: Request channel
+- 3'b100: Snoop channel
+- 3'b110: Response channel
+- 3'b111: Data channel
+
+7. tt_en: NoC PMU supports counting only transactions that have tracetag set
+if this option is set. See the 2nd list for more information about tracetag.
+
+For HiSilicon uncore PMU v3 whose identifier is 0x40, some uncore PMUs are
+further divided into parts for finer granularity of tracing, each part has its
+own dedicated PMU, and all such PMUs together cover the monitoring job of events
+on particular uncore device. Such PMUs are described in sysfs with name format
+slightly changed::
+
+/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}_{Z}/ddrc{Y}_{Z}/noc{Y}_{Z}>
+
+Z is the sub-id, indicating different PMUs for part of hardware device.
+
+Usage of most PMUs with different sub-ids are identical. Specially, L3C PMU
+provides ``ext`` option to allow exploration of even finer granual statistics
+of L3C PMU. L3C PMU driver uses that as hint of termination when delivering
+perf command to hardware:
+
+- ext=0: Default, could be used with event names.
+- ext=1 and ext=2: Must be used with event codes, event names are not supported.
+
+An example of perf command could be::
+
+ $# perf stat -a -e hisi_sccl0_l3c1_0/rd_spipe/ sleep 5
+
+or::
+
+ $# perf stat -a -e hisi_sccl0_l3c1_0/event=0x1,ext=1/ sleep 5
+
+As above, ``hisi_sccl0_l3c1_0`` locates PMU of Super CPU CLuster 0, L3 cache 1
+pipe0.
+
+First command locates the first part of L3C since ``ext=0`` is implied by
+default. Second command issues the counting on another part of L3C with the
+event ``0x1``.
Users could configure IDs to count data come from specific CCL/ICL, by setting
srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting
diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst
index 75a40846d47f..1195e570f2d6 100644
--- a/Documentation/admin-guide/perf/hns3-pmu.rst
+++ b/Documentation/admin-guide/perf/hns3-pmu.rst
@@ -16,7 +16,7 @@ HNS3 PMU driver
The HNS3 PMU driver registers a perf PMU with the name of its sicl id.::
- /sys/devices/hns3_pmu_sicl_<sicl_id>
+ /sys/bus/event_source/devices/hns3_pmu_sicl_<sicl_id>
PMU driver provides description of available events, filter modes, format,
identifier and cpumask in sysfs.
@@ -40,9 +40,9 @@ device.
Example usage of checking event code and subevent code::
- $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
config=0x00204
- $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
config=0x10204
Each performance statistic has a pair of events to get two values to
@@ -60,7 +60,7 @@ computation to calculate real performance data is:::
Example usage of checking supported filter mode::
- $# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
filter mode supported: global/port/port-tc/func/func-queue/
Example usage of perf::
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index f4a4513c526f..47d9a3df6329 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -13,8 +13,12 @@ Performance monitor support
imx-ddr
qcom_l2_pmu
qcom_l3_pmu
+ starfive_starlink_pmu
+ mrvl-odyssey-ddr-pmu
+ mrvl-odyssey-tad-pmu
arm-ccn
arm-cmn
+ arm-ni
xgene-pmu
arm_dsu_pmu
thunderx2-pmu
@@ -24,3 +28,5 @@ Performance monitor support
meson-ddr-pmu
cxl
ampere_cspmu
+ mrvl-pem-pmu
+ fujitsu_uncore_pmu
diff --git a/Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst b/Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst
new file mode 100644
index 000000000000..2e817593a4d9
--- /dev/null
+++ b/Documentation/admin-guide/perf/mrvl-odyssey-ddr-pmu.rst
@@ -0,0 +1,80 @@
+===================================================================
+Marvell Odyssey DDR PMU Performance Monitoring Unit (PMU UNCORE)
+===================================================================
+
+Odyssey DRAM Subsystem supports eight counters for monitoring performance
+and software can program those counters to monitor any of the defined
+performance events. Supported performance events include those counted
+at the interface between the DDR controller and the PHY, interface between
+the DDR Controller and the CHI interconnect, or within the DDR Controller.
+
+Additionally DSS also supports two fixed performance event counters, one
+for ddr reads and the other for ddr writes.
+
+The counter will be operating in either manual or auto mode.
+
+The PMU driver exposes the available events and format options under sysfs::
+
+ /sys/bus/event_source/devices/mrvl_ddr_pmu_<>/events/
+ /sys/bus/event_source/devices/mrvl_ddr_pmu_<>/format/
+
+Examples::
+
+ $ perf list | grep ddr
+ mrvl_ddr_pmu_<>/ddr_act_bypass_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_bsm_alloc/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_bsm_starvation/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_active_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_mwr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_rd_active_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_rd_or_wr_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_read/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_wr_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_cam_write/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_capar_error/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_crit_ref/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_ddr_reads/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_ddr_writes/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_cmd_is_retry/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_cycles/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_parity_poison/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_rd_data_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dfi_wr_data_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dqsosc_mpc/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_dqsosc_mrr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_enter_mpsm/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_enter_powerdown/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_enter_selfref/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_pri_rdaccess/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_rd_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_rd_or_wr_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_rmw_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hif_wr_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_hpri_sched_rd_crit_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_load_mode/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_lpri_sched_rd_crit_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_precharge/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_precharge_for_other/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_precharge_for_rdwr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_raw_hazard/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_rd_bypass_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_rd_crc_error/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_rd_uc_ecc_error/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_rdwr_transitions/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_refresh/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_retry_fifo_full/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_spec_ref/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_tcr_mrr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_war_hazard/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_waw_hazard/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_win_limit_reached_rd/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_win_limit_reached_wr/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_wr_crc_error/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_wr_trxn_crit_access/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_write_combine/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_zqcl/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_zqlatch/ [Kernel PMU event]
+ mrvl_ddr_pmu_<>/ddr_zqstart/ [Kernel PMU event]
+
+ $ perf stat -e ddr_cam_read,ddr_cam_write,ddr_cam_active_access,ddr_cam
+ rd_or_wr_access,ddr_cam_rd_active_access,ddr_cam_mwr <workload>
diff --git a/Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst b/Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst
new file mode 100644
index 000000000000..ad1975b14087
--- /dev/null
+++ b/Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst
@@ -0,0 +1,37 @@
+====================================================================
+Marvell Odyssey LLC-TAD Performance Monitoring Unit (PMU UNCORE)
+====================================================================
+
+Each TAD provides eight 64-bit counters for monitoring
+cache behavior.The driver always configures the same counter for
+all the TADs. The user would end up effectively reserving one of
+eight counters in every TAD to look across all TADs.
+The occurrences of events are aggregated and presented to the user
+at the end of running the workload. The driver does not provide a
+way for the user to partition TADs so that different TADs are used for
+different applications.
+
+The performance events reflect various internal or interface activities.
+By combining the values from multiple performance counters, cache
+performance can be measured in terms such as: cache miss rate, cache
+allocations, interface retry rate, internal resource occupancy, etc.
+
+The PMU driver exposes the available events and format options under sysfs::
+
+ /sys/bus/event_source/devices/tad/events/
+ /sys/bus/event_source/devices/tad/format/
+
+Examples::
+
+ $ perf list | grep tad
+ tad/tad_alloc_any/ [Kernel PMU event]
+ tad/tad_alloc_dtg/ [Kernel PMU event]
+ tad/tad_alloc_ltg/ [Kernel PMU event]
+ tad/tad_hit_any/ [Kernel PMU event]
+ tad/tad_hit_dtg/ [Kernel PMU event]
+ tad/tad_hit_ltg/ [Kernel PMU event]
+ tad/tad_req_msh_in_exlmn/ [Kernel PMU event]
+ tad/tad_tag_rd/ [Kernel PMU event]
+ tad/tad_tot_cycle/ [Kernel PMU event]
+
+ $ perf stat -e tad_alloc_dtg,tad_alloc_ltg,tad_alloc_any,tad_hit_dtg,tad_hit_ltg,tad_hit_any,tad_tag_rd <workload>
diff --git a/Documentation/admin-guide/perf/mrvl-pem-pmu.rst b/Documentation/admin-guide/perf/mrvl-pem-pmu.rst
new file mode 100644
index 000000000000..c39007149b97
--- /dev/null
+++ b/Documentation/admin-guide/perf/mrvl-pem-pmu.rst
@@ -0,0 +1,56 @@
+=================================================================
+Marvell Odyssey PEM Performance Monitoring Unit (PMU UNCORE)
+=================================================================
+
+The PCI Express Interface Units(PEM) are associated with a corresponding
+monitoring unit. This includes performance counters to track various
+characteristics of the data that is transmitted over the PCIe link.
+
+The counters track inbound and outbound transactions which
+includes separate counters for posted/non-posted/completion TLPs.
+Also, inbound and outbound memory read requests along with their
+latencies can also be monitored. Address Translation Services(ATS)events
+such as ATS Translation, ATS Page Request, ATS Invalidation along with
+their corresponding latencies are also tracked.
+
+There are separate 64 bit counters to measure posted/non-posted/completion
+tlps in inbound and outbound transactions. ATS events are measured by
+different counters.
+
+The PMU driver exposes the available events and format options under sysfs,
+/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/events/
+/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/format/
+
+Examples::
+
+ # perf list | grep mrvl_pcie_rc_pmu
+ mrvl_pcie_rc_pmu_<>/ats_inv/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_inv_latency/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_pri/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_pri_latency/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_trans/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ats_trans_latency/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_inflight/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_reads/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ebus/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ncb/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_npr/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_pr/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_npr/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ib_tlp_pr/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_inflight_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_merges_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_merges_npr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_merges_pr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_reads_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_cpl_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_npr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_pr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_npr_partid/ [Kernel PMU event]
+ mrvl_pcie_rc_pmu_<>/ob_tlp_pr_partid/ [Kernel PMU event]
+
+
+ # perf stat -e ib_inflight,ib_reads,ib_req_no_ro_ebus,ib_req_no_ro_ncb <workload>
diff --git a/Documentation/admin-guide/perf/nvidia-pmu.rst b/Documentation/admin-guide/perf/nvidia-pmu.rst
index 2e0d47cfe7ea..f538ef67e0e8 100644
--- a/Documentation/admin-guide/perf/nvidia-pmu.rst
+++ b/Documentation/admin-guide/perf/nvidia-pmu.rst
@@ -34,7 +34,7 @@ strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_scf_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_scf_pmu_<socket-id>.
Example usage:
@@ -66,7 +66,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
the PMU traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
Example usage:
@@ -86,6 +86,22 @@ Example usage:
perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
+The NVLink-C2C has two ports that can be connected to one GPU (occupying both
+ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
+parameter to select the port(s) to monitor. Each bit represents the port number,
+e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
+PMU will monitor both ports by default if not specified.
+
+Example for port filtering:
+
+* Count event id 0x0 from the GPU connected with socket 0 on port 0::
+
+ perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/
+
+* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
+
+ perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/
+
NVLink-C2C1 PMU
-------------------
@@ -96,7 +112,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
the PMU traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
Example usage:
@@ -116,6 +132,22 @@ Example usage:
perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
+The NVLink-C2C has two ports that can be connected to one GPU (occupying both
+ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
+parameter to select the port(s) to monitor. Each bit represents the port number,
+e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
+PMU will monitor both ports by default if not specified.
+
+Example for port filtering:
+
+* Count event id 0x0 from the GPU connected with socket 0 on port 0::
+
+ perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/
+
+* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
+
+ perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/
+
CNVLink PMU
---------------
@@ -125,13 +157,14 @@ to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
for more info about the PMU traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>.
Each SoC socket can be connected to one or more sockets via CNVLink. The user can
use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
-socket 1 to 3.
-/sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
+socket 1 to 3. The PMU will monitor all remote sockets by default if not
+specified.
+/sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
shows the valid bits that can be set in the "rem_socket" parameter.
The PMU can not distinguish the remote traffic initiator, therefore it does not
@@ -165,12 +198,13 @@ local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section
for more info about the PMU traffic coverage.
The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>.
Each SoC socket can support multiple root ports. The user can use
"root_port" bitmap parameter to select the port(s) to monitor, i.e.
-"root_port=0xF" corresponds to root port 0 to 3.
-/sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
+"root_port=0xF" corresponds to root port 0 to 3. The PMU will monitor all root
+ports by default if not specified.
+/sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
shows the valid bits that can be set in the "root_port" parameter.
Example usage:
diff --git a/Documentation/admin-guide/perf/qcom_l2_pmu.rst b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
index c130178a4a55..c37c6be9b8d8 100644
--- a/Documentation/admin-guide/perf/qcom_l2_pmu.rst
+++ b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
@@ -10,7 +10,7 @@ There is one logical L2 PMU exposed, which aggregates the results from
the physical PMUs.
The driver provides a description of its available events and configuration
-options in sysfs, see /sys/devices/l2cache_0.
+options in sysfs, see /sys/bus/event_source/devices/l2cache_0.
The "format" directory describes the format of the events.
diff --git a/Documentation/admin-guide/perf/qcom_l3_pmu.rst b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
index a3d014a46bfd..a66556b7e985 100644
--- a/Documentation/admin-guide/perf/qcom_l3_pmu.rst
+++ b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
@@ -9,7 +9,7 @@ PMU with device name l3cache_<socket>_<instance>. User space is responsible
for aggregating across slices.
The driver provides a description of its available events and configuration
-options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
+options in sysfs, see /sys/bus/event_source/devices/l3cache*. Given that these are uncore PMUs
the driver also exposes a "cpumask" sysfs attribute which contains a mask
consisting of one CPU per socket which will be used to handle all the PMU
events on that socket.
diff --git a/Documentation/admin-guide/perf/starfive_starlink_pmu.rst b/Documentation/admin-guide/perf/starfive_starlink_pmu.rst
new file mode 100644
index 000000000000..2932ddb4eb76
--- /dev/null
+++ b/Documentation/admin-guide/perf/starfive_starlink_pmu.rst
@@ -0,0 +1,46 @@
+================================================
+StarFive StarLink Performance Monitor Unit (PMU)
+================================================
+
+StarFive StarLink Performance Monitor Unit (PMU) exists within the
+StarLink Coherent Network on Chip (CNoC) that connects multiple CPU
+clusters with an L3 memory system.
+
+The uncore PMU supports overflow interrupt, up to 16 programmable 64bit
+event counters, and an independent 64bit cycle counter.
+The PMU can only be accessed via Memory Mapped I/O and are common to the
+cores connected to the same PMU.
+
+Driver exposes supported PMU events in sysfs "events" directory under::
+
+ /sys/bus/event_source/devices/starfive_starlink_pmu/events/
+
+Driver exposes cpu used to handle PMU events in sysfs "cpumask" directory
+under::
+
+ /sys/bus/event_source/devices/starfive_starlink_pmu/cpumask/
+
+Driver describes the format of config (event ID) in sysfs "format" directory
+under::
+
+ /sys/bus/event_source/devices/starfive_starlink_pmu/format/
+
+Example of perf usage::
+
+ $ perf list
+
+ starfive_starlink_pmu/cycles/ [Kernel PMU event]
+ starfive_starlink_pmu/read_hit/ [Kernel PMU event]
+ starfive_starlink_pmu/read_miss/ [Kernel PMU event]
+ starfive_starlink_pmu/read_request/ [Kernel PMU event]
+ starfive_starlink_pmu/release_request/ [Kernel PMU event]
+ starfive_starlink_pmu/write_hit/ [Kernel PMU event]
+ starfive_starlink_pmu/write_miss/ [Kernel PMU event]
+ starfive_starlink_pmu/write_request/ [Kernel PMU event]
+ starfive_starlink_pmu/writeback/ [Kernel PMU event]
+
+
+ $ perf stat -a -e /starfive_starlink_pmu/cycles/ sleep 1
+
+Sampling is not supported. As a result, "perf record" is not supported.
+Attaching to a task is not supported, only system-wide counting is supported.
diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst
index 01f158238ae1..9255f7bf9452 100644
--- a/Documentation/admin-guide/perf/thunderx2-pmu.rst
+++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst
@@ -22,7 +22,7 @@ The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8
(CCPI2) events simultaneously. The PMUs provide a description of their
available events and configuration options under sysfs, see
-/sys/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
+/sys/bus/event_source/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
The driver does not support sampling, therefore "perf record" will not
work. Per-task perf sessions are also not supported.
diff --git a/Documentation/admin-guide/perf/xgene-pmu.rst b/Documentation/admin-guide/perf/xgene-pmu.rst
index 644f8ed89152..98ccb8e777c4 100644
--- a/Documentation/admin-guide/perf/xgene-pmu.rst
+++ b/Documentation/admin-guide/perf/xgene-pmu.rst
@@ -13,7 +13,7 @@ PMU (perf) driver
The xgene-pmu driver registers several perf PMU drivers. Each of the perf
driver provides description of its available events and configuration options
-in sysfs, see /sys/devices/<l3cX/iobX/mcbX/mcX>/.
+in sysfs, see /sys/bus/event_source/devices/<l3cX/iobX/mcbX/mcX>/.
The "format" directory describes format of the config (event ID),
config1 (agent ID) fields of the perf_event_attr structure. The "events"
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 9eb26014d34b..e1771f2225d5 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -72,7 +72,7 @@ to manage each performance update behavior. ::
Lowest non- | | | |
linear perf ------>+-----------------------+ +-----------------------+
| | | |
- | | Lowest perf ---->| |
+ | | Min perf ---->| |
| | | |
Lowest perf ------>+-----------------------+ +-----------------------+
| | | |
@@ -262,6 +262,17 @@ lowest non-linear performance in `AMD CPPC Performance Capability
<perf_cap_>`_.)
This attribute is read-only.
+``amd_pstate_hw_prefcore``
+
+Whether the platform supports the preferred core feature and it has been
+enabled. This attribute is read-only.
+
+``amd_pstate_prefcore_ranking``
+
+The performance ranking of the core. This number doesn't have any unit, but
+larger numbers are preferred at the time of reading. This can change at
+runtime based on platform conditions. This attribute is read-only.
+
``energy_performance_available_preferences``
A list of all the supported EPP preferences that could be used for
@@ -281,6 +292,22 @@ integer values defined between 0 to 255 when EPP feature is enabled by platform
firmware, if EPP feature is disabled, driver will ignore the written value
This attribute is read-write.
+``boost``
+The `boost` sysfs attribute provides control over the CPU core
+performance boost, allowing users to manage the maximum frequency limitation
+of the CPU. This attribute can be used to enable or disable the boost feature
+on individual CPUs.
+
+When the boost feature is enabled, the CPU can dynamically increase its frequency
+beyond the base frequency, providing enhanced performance for demanding workloads.
+On the other hand, disabling the boost feature restricts the CPU to operate at the
+base frequency, which may be desirable in certain scenarios to prioritize power
+efficiency or manage temperature.
+
+To manipulate the `boost` attribute, users can write a value of `0` to disable the
+boost or `1` to enable it, for the respective CPU using the sysfs path
+`/sys/devices/system/cpu/cpuX/cpufreq/boost`, where `X` represents the CPU number.
+
Other performance and frequency values can be read back from
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
@@ -300,8 +327,8 @@ platforms. The AMD P-States mechanism is the more performance and energy
efficiency frequency management method on AMD processors.
-AMD Pstate Driver Operation Modes
-=================================
+``amd-pstate`` Driver Operation Modes
+======================================
``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode,
non-autonomous (passive) mode and guided autonomous (guided) mode.
@@ -353,6 +380,48 @@ is activated. In this mode, driver requests minimum and maximum performance
level and the platform autonomously selects a performance level in this range
and appropriate to the current workload.
+``amd-pstate`` Preferred Core
+=================================
+
+The core frequency is subjected to the process variation in semiconductors.
+Not all cores are able to reach the maximum frequency respecting the
+infrastructure limits. Consequently, AMD has redefined the concept of
+maximum frequency of a part. This means that a fraction of cores can reach
+maximum frequency. To find the best process scheduling policy for a given
+scenario, OS needs to know the core ordering informed by the platform through
+highest performance capability register of the CPPC interface.
+
+``amd-pstate`` preferred core enables the scheduler to prefer scheduling on
+cores that can achieve a higher frequency with lower voltage. The preferred
+core rankings can dynamically change based on the workload, platform conditions,
+thermals and ageing.
+
+The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate``
+driver will also determine whether or not ``amd-pstate`` preferred core is
+supported by the platform.
+
+``amd-pstate`` driver will provide an initial core ordering when the system boots.
+The platform uses the CPPC interfaces to communicate the core ranking to the
+operating system and scheduler to make sure that OS is choosing the cores
+with highest performance firstly for scheduling the process. When ``amd-pstate``
+driver receives a message with the highest performance change, it will
+update the core ranking and set the cpu's priority.
+
+``amd-pstate`` Preferred Core Switch
+=====================================
+Kernel Parameters
+-----------------
+
+``amd-pstate`` peferred core`` has two states: enable and disable.
+Enable/disable states can be chosen by different kernel parameters.
+Default enable ``amd-pstate`` preferred core.
+
+``amd_prefcore=disable``
+
+For systems that support ``amd-pstate`` preferred core, the core rankings will
+always be advertised by the platform. But OS can choose to ignore that via the
+kernel parameter ``amd_prefcore=disable``.
+
User Space Interface in ``sysfs`` - General
===========================================
@@ -364,7 +433,7 @@ control its functionality at the system level. They are located in the
``/sys/devices/system/cpu/amd_pstate/`` directory and affect all CPUs.
``status``
- Operation mode of the driver: "active", "passive" or "disable".
+ Operation mode of the driver: "active", "passive", "guided" or "disable".
"active"
The driver is functional and in the ``active mode``
@@ -385,6 +454,19 @@ control its functionality at the system level. They are located in the
to the operation mode represented by that string - or to be
unregistered in the "disable" case.
+``prefcore``
+ Preferred core state of the driver: "enabled" or "disabled".
+
+ "enabled"
+ Enable the ``amd-pstate`` preferred core.
+
+ "disabled"
+ Disable the ``amd-pstate`` preferred core
+
+
+ This attribute is read-only to check the state of preferred core set
+ by the kernel parameter.
+
``cpupower`` tool support for ``amd-pstate``
===============================================
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 6adb7988e0eb..738d7b4dc33a 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -231,7 +231,7 @@ are the following:
present).
The existence of the limit may be a result of some (often unintentional)
- BIOS settings, restrictions coming from a service processor or another
+ BIOS settings, restrictions coming from a service processor or other
BIOS/HW-based mechanisms.
This does not cover ACPI thermal limitations which can be discovered
@@ -248,6 +248,20 @@ are the following:
If that frequency cannot be determined, this attribute should not
be present.
+``cpuinfo_avg_freq``
+ An average frequency (in KHz) of all CPUs belonging to a given policy,
+ derived from a hardware provided feedback and reported on a time frame
+ spanning at most few milliseconds.
+
+ This is expected to be based on the frequency the hardware actually runs
+ at and, as such, might require specialised hardware support (such as AMU
+ extension on ARM). If one cannot be determined, this attribute should
+ not be present.
+
+ Note that failed attempt to retrieve current frequency for a given
+ CPU(s) will result in an appropriate error, i.e.: EAGAIN for CPU that
+ remains idle (raised on ARM).
+
``cpuinfo_max_freq``
Maximum possible operating frequency the CPUs belonging to this policy
can run at (in kHz).
@@ -260,13 +274,13 @@ are the following:
The time it takes to switch the CPUs belonging to this policy from one
P-state to another, in nanoseconds.
- If unknown or if known to be so high that the scaling driver does not
- work with the `ondemand`_ governor, -1 (:c:macro:`CPUFREQ_ETERNAL`)
- will be returned by reads from this attribute.
-
``related_cpus``
List of all (online and offline) CPUs belonging to this policy.
+``scaling_available_frequencies``
+ List of available frequencies of the CPUs belonging to this policy
+ (in kHz).
+
``scaling_available_governors``
List of ``CPUFreq`` scaling governors present in the kernel that can
be attached to this policy or (if the |intel_pstate| scaling driver is
@@ -289,7 +303,8 @@ are the following:
Some architectures (e.g. ``x86``) may attempt to provide information
more precisely reflecting the current CPU frequency through this
attribute, but that still may not be the exact current CPU frequency as
- seen by the hardware at the moment.
+ seen by the hardware at the moment. This behavior though, is only
+ available via c:macro:``CPUFREQ_ARCH_CUR_FREQ`` option.
``scaling_driver``
The scaling driver currently in use.
@@ -379,7 +394,9 @@ policy limits change after that.
This governor does not do anything by itself. Instead, it allows user space
to set the CPU frequency for the policy it is attached to by writing to the
-``scaling_setspeed`` attribute of that policy.
+``scaling_setspeed`` attribute of that policy. Though the intention may be to
+set an exact frequency for the policy, the actual frequency may vary depending
+on hardware coordination, thermal and power limits, and other factors.
``schedutil``
-------------
@@ -421,8 +438,8 @@ This governor exposes only one tunable:
``rate_limit_us``
Minimum time (in microseconds) that has to pass between two consecutive
- runs of governor computations (default: 1000 times the scaling driver's
- transition latency).
+ runs of governor computations (default: 1.5 times the scaling driver's
+ transition latency or the maximum 2ms).
The purpose of this tunable is to reduce the scheduler context overhead
of the governor which might be excessive without it.
@@ -470,17 +487,17 @@ This governor exposes the following tunables:
This is how often the governor's worker routine should run, in
microseconds.
- Typically, it is set to values of the order of 10000 (10 ms). Its
- default value is equal to the value of ``cpuinfo_transition_latency``
- for each policy this governor is attached to (but since the unit here
- is greater by 1000, this means that the time represented by
- ``sampling_rate`` is 1000 times greater than the transition latency by
- default).
+ Typically, it is set to values of the order of 2000 (2 ms). Its
+ default value is to add a 50% breathing room
+ to ``cpuinfo_transition_latency`` on each policy this governor is
+ attached to. The minimum is typically the length of two scheduler
+ ticks.
If this tunable is per-policy, the following shell command sets the time
- represented by it to be 750 times as high as the transition latency::
+ represented by it to be 1.5 times as high as the transition latency
+ (the default)::
- # echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate
+ # echo `$(($(cat cpuinfo_transition_latency) * 3 / 2))` > ondemand/sampling_rate
``up_threshold``
If the estimated CPU load is above this value (in percent), the governor
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index 19754beb5a4e..be4c1120e3f0 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -269,61 +269,56 @@ Namely, when invoked to select an idle state for a CPU (i.e. an idle state that
the CPU will ask the processor hardware to enter), it attempts to predict the
idle duration and uses the predicted value for idle state selection.
-It first obtains the time until the closest timer event with the assumption
-that the scheduler tick will be stopped. That time, referred to as the *sleep
-length* in what follows, is the upper bound on the time before the next CPU
-wakeup. It is used to determine the sleep length range, which in turn is needed
-to get the sleep length correction factor.
-
-The ``menu`` governor maintains two arrays of sleep length correction factors.
-One of them is used when tasks previously running on the given CPU are waiting
-for some I/O operations to complete and the other one is used when that is not
-the case. Each array contains several correction factor values that correspond
-to different sleep length ranges organized so that each range represented in the
-array is approximately 10 times wider than the previous one.
-
-The correction factor for the given sleep length range (determined before
-selecting the idle state for the CPU) is updated after the CPU has been woken
-up and the closer the sleep length is to the observed idle duration, the closer
-to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
-The sleep length is multiplied by the correction factor for the range that it
-falls into to obtain the first approximation of the predicted idle duration.
-
-Next, the governor uses a simple pattern recognition algorithm to refine its
+It first uses a simple pattern recognition algorithm to obtain a preliminary
idle duration prediction. Namely, it saves the last 8 observed idle duration
values and, when predicting the idle duration next time, it computes the average
and variance of them. If the variance is small (smaller than 400 square
milliseconds) or it is small relative to the average (the average is greater
that 6 times the standard deviation), the average is regarded as the "typical
-interval" value. Otherwise, the longest of the saved observed idle duration
+interval" value. Otherwise, either the longest or the shortest (depending on
+which one is farther from the average) of the saved observed idle duration
values is discarded and the computation is repeated for the remaining ones.
+
Again, if the variance of them is small (in the above sense), the average is
taken as the "typical interval" value and so on, until either the "typical
-interval" is determined or too many data points are disregarded, in which case
-the "typical interval" is assumed to equal "infinity" (the maximum unsigned
-integer value). The "typical interval" computed this way is compared with the
-sleep length multiplied by the correction factor and the minimum of the two is
-taken as the predicted idle duration.
-
-Then, the governor computes an extra latency limit to help "interactive"
-workloads. It uses the observation that if the exit latency of the selected
-idle state is comparable with the predicted idle duration, the total time spent
-in that state probably will be very short and the amount of energy to save by
-entering it will be relatively small, so likely it is better to avoid the
-overhead related to entering that state and exiting it. Thus selecting a
-shallower state is likely to be a better option then. The first approximation
-of the extra latency limit is the predicted idle duration itself which
-additionally is divided by a value depending on the number of tasks that
-previously ran on the given CPU and now they are waiting for I/O operations to
-complete. The result of that division is compared with the latency limit coming
-from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
-framework and the minimum of the two is taken as the limit for the idle states'
-exit latency.
+interval" is determined or too many data points are disregarded. In the latter
+case, if the size of the set of data points still under consideration is
+sufficiently large, the next idle duration is not likely to be above the largest
+idle duration value still in that set, so that value is taken as the predicted
+next idle duration. Finally, if the set of data points still under
+consideration is too small, no prediction is made.
+
+If the preliminary prediction of the next idle duration computed this way is
+long enough, the governor obtains the time until the closest timer event with
+the assumption that the scheduler tick will be stopped. That time, referred to
+as the *sleep length* in what follows, is the upper bound on the time before the
+next CPU wakeup. It is used to determine the sleep length range, which in turn
+is needed to get the sleep length correction factor.
+
+The ``menu`` governor maintains an array containing several correction factor
+values that correspond to different sleep length ranges organized so that each
+range represented in the array is approximately 10 times wider than the previous
+one.
+
+The correction factor for the given sleep length range (determined before
+selecting the idle state for the CPU) is updated after the CPU has been woken
+up and the closer the sleep length is to the observed idle duration, the closer
+to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
+The sleep length is multiplied by the correction factor for the range that it
+falls into to obtain an approximation of the predicted idle duration that is
+compared to the "typical interval" determined previously and the minimum of
+the two is taken as the final idle duration prediction.
+
+If the "typical interval" value is small, which means that the CPU is likely
+to be woken up soon enough, the sleep length computation is skipped as it may
+be costly and the idle duration is simply predicted to equal the "typical
+interval" value.
Now, the governor is ready to walk the list of idle states and choose one of
them. For this purpose, it compares the target residency of each state with
-the predicted idle duration and the exit latency of it with the computed latency
-limit. It selects the state with the target residency closest to the predicted
+the predicted idle duration and the exit latency of it with the with the latency
+limit coming from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
+framework. It selects the state with the target residency closest to the predicted
idle duration, but still below it, and exit latency that does not exceed the
limit.
@@ -585,6 +580,15 @@ the given CPU as the upper limit for the exit latency of the idle states that
they are allowed to select for that CPU. They should never select any idle
states with exit latency beyond that limit.
+While the above CPU QoS constraints apply to CPU idle time management, user
+space may also request a CPU system wakeup latency QoS limit, via the
+`cpu_wakeup_latency` file. This QoS constraint is respected when selecting a
+suitable idle state for the CPUs, while entering the system-wide suspend-to-idle
+sleep state, but also to the regular CPU idle time management.
+
+Note that, the management of the `cpu_wakeup_latency` file works according to
+the 'cpu_dma_latency' file from user space point of view. Moreover, the unit
+is also microseconds.
Idle States Control Via Kernel Command Line
===========================================
diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst
index 39bd6ecce7de..ed6f055d4b14 100644
--- a/Documentation/admin-guide/pm/intel_idle.rst
+++ b/Documentation/admin-guide/pm/intel_idle.rst
@@ -38,6 +38,27 @@ instruction at all.
only way to pass early-configuration-time parameters to it is via the kernel
command line.
+Sysfs Interface
+===============
+
+The ``intel_idle`` driver exposes the following ``sysfs`` attributes in
+``/sys/devices/system/cpu/cpuidle/``:
+
+``intel_c1_demotion``
+ Enable or disable C1 demotion for all CPUs in the system. This file is
+ only exposed on platforms that support the C1 demotion feature and where
+ it was tested. Value 0 means that C1 demotion is disabled, value 1 means
+ that it is enabled. Write 0 or 1 to disable or enable C1 demotion for
+ all CPUs.
+
+ The C1 demotion feature involves the platform firmware demoting deep
+ C-state requests from the OS (e.g., C6 requests) to C1. The idea is that
+ firmware monitors CPU wake-up rate, and if it is higher than a
+ platform-specific threshold, the firmware demotes deep C-state requests
+ to C1. For example, Linux requests C6, but firmware noticed too many
+ wake-ups per second, and it keeps the CPU in C1. When the CPU stays in
+ C1 long enough, the platform promotes it back to C6. This may improve
+ some workloads' performance, but it may also increase power consumption.
.. _intel-idle-enumeration-of-states:
@@ -192,11 +213,19 @@ even if they have been enumerated (see :ref:`cpu-pm-qos` in
Documentation/admin-guide/pm/cpuidle.rst).
Setting ``max_cstate`` to 0 causes the ``intel_idle`` initialization to fail.
-The ``no_acpi`` and ``use_acpi`` module parameters (recognized by ``intel_idle``
-if the kernel has been configured with ACPI support) can be set to make the
-driver ignore the system's ACPI tables entirely or use them for all of the
-recognized processor models, respectively (they both are unset by default and
-``use_acpi`` has no effect if ``no_acpi`` is set).
+The ``no_acpi``, ``use_acpi`` and ``no_native`` module parameters are
+recognized by ``intel_idle`` if the kernel has been configured with ACPI
+support. In the case that ACPI is not configured these flags have no impact
+on functionality.
+
+``no_acpi`` - Do not use ACPI at all. Only native mode is available, no
+ACPI mode.
+
+``use_acpi`` - No-op in ACPI mode, the driver will consult ACPI tables for
+C-states on/off status in native mode.
+
+``no_native`` - Work only in ACPI mode, no native mode available (ignore
+all custom tables).
The value of the ``states_off`` module parameter (0 by default) represents a
list of idle states to be disabled by default in the form of a bitmask.
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index bf13ad25a32f..fde967b0c2e0 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -48,8 +48,9 @@ only way to pass early-configuration-time parameters to it is via the kernel
command line. However, its configuration can be adjusted via ``sysfs`` to a
great extent. In some configurations it even is possible to unregister it via
``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and
-registered (see `below <status_attr_>`_).
+registered (see :ref:`below <status_attr>`).
+.. _operation_modes:
Operation Modes
===============
@@ -62,6 +63,8 @@ a certain performance scaling algorithm. Which of them will be in effect
depends on what kernel command line options are used and on the capabilities of
the processor.
+.. _active_mode:
+
Active Mode
-----------
@@ -94,6 +97,8 @@ Which of the P-state selection algorithms is used by default depends on the
Namely, if that option is set, the ``performance`` algorithm will be used by
default, and the other one will be used by default if it is not set.
+.. _active_mode_hwp:
+
Active Mode With HWP
~~~~~~~~~~~~~~~~~~~~
@@ -123,7 +128,7 @@ Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
internal P-state selection logic is expected to focus entirely on performance.
This will override the EPP/EPB setting coming from the ``sysfs`` interface
-(see `Energy vs Performance Hints`_ below). Moreover, any attempts to change
+(see :ref:`energy_performance_hints` below). Moreover, any attempts to change
the EPP/EPB to a value different from 0 ("performance") via ``sysfs`` in this
configuration will be rejected.
@@ -192,6 +197,8 @@ This is the default P-state selection algorithm if the
:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
is not set.
+.. _passive_mode:
+
Passive Mode
------------
@@ -289,12 +296,12 @@ Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes
the entire range of available P-states, including the whole turbo range, to the
``CPUFreq`` core and (in the passive mode) to generic scaling governors. This
generally causes turbo P-states to be set more often when ``intel_pstate`` is
-used relative to ACPI-based CPU performance scaling (see `below <acpi-cpufreq_>`_
-for more information).
+used relative to ACPI-based CPU performance scaling (see
+:ref:`below <acpi-cpufreq>` for more information).
Moreover, since ``intel_pstate`` always knows what the real turbo threshold is
(even if the Configurable TDP feature is enabled in the processor), its
-``no_turbo`` attribute in ``sysfs`` (described `below <no_turbo_attr_>`_) should
+``no_turbo`` attribute in ``sysfs`` (described :ref:`below <no_turbo_attr>`) should
work as expected in all cases (that is, if set to disable turbo P-states, it
always should prevent ``intel_pstate`` from using them).
@@ -307,12 +314,12 @@ pieces of information on it to be known, including:
* The minimum supported P-state.
- * The maximum supported `non-turbo P-state <turbo_>`_.
+ * The maximum supported :ref:`non-turbo P-state <turbo>`.
* Whether or not turbo P-states are supported at all.
- * The maximum supported `one-core turbo P-state <turbo_>`_ (if turbo P-states
- are supported).
+ * The maximum supported :ref:`one-core turbo P-state <turbo>` (if turbo
+ P-states are supported).
* The scaling formula to translate the driver's internal representation
of P-states into frequencies and the other way around.
@@ -329,9 +336,111 @@ information listed above is the same for all of the processors supporting the
HWP feature, which is why ``intel_pstate`` works with all of them.]
+Support for Hybrid Processors
+=============================
+
+Some processors supported by ``intel_pstate`` contain two or more types of CPU
+cores differing by the maximum turbo P-state, performance vs power characteristics,
+cache sizes, and possibly other properties. They are commonly referred to as
+hybrid processors. To support them, ``intel_pstate`` requires HWP to be enabled
+and it assumes the HWP performance units to be the same for all CPUs in the
+system, so a given HWP performance level always represents approximately the
+same physical performance regardless of the core (CPU) type.
+
+Hybrid Processors with SMT
+--------------------------
+
+On systems where SMT (Simultaneous Multithreading), also referred to as
+HyperThreading (HT) in the context of Intel processors, is enabled on at least
+one core, ``intel_pstate`` assigns performance-based priorities to CPUs. Namely,
+the priority of a given CPU reflects its highest HWP performance level which
+causes the CPU scheduler to generally prefer more performant CPUs, so the less
+performant CPUs are used when the other ones are fully loaded. However, SMT
+siblings (that is, logical CPUs sharing one physical core) are treated in a
+special way such that if one of them is in use, the effective priority of the
+other ones is lowered below the priorities of the CPUs located in the other
+physical cores.
+
+This approach maximizes performance in the majority of cases, but unfortunately
+it also leads to excessive energy usage in some important scenarios, like video
+playback, which is not generally desirable. While there is no other viable
+choice with SMT enabled because the effective capacity and utilization of SMT
+siblings are hard to determine, hybrid processors without SMT can be handled in
+more energy-efficient ways.
+
+.. _CAS:
+
+Capacity-Aware Scheduling Support
+---------------------------------
+
+The capacity-aware scheduling (CAS) support in the CPU scheduler is enabled by
+``intel_pstate`` by default on hybrid processors without SMT. CAS generally
+causes the scheduler to put tasks on a CPU so long as there is a sufficient
+amount of spare capacity on it, and if the utilization of a given task is too
+high for it, the task will need to go somewhere else.
+
+Since CAS takes CPU capacities into account, it does not require CPU
+prioritization and it allows tasks to be distributed more symmetrically among
+the more performant and less performant CPUs. Once placed on a CPU with enough
+capacity to accommodate it, a task may just continue to run there regardless of
+whether or not the other CPUs are fully loaded, so on average CAS reduces the
+utilization of the more performant CPUs which causes the energy usage to be more
+balanced because the more performant CPUs are generally less energy-efficient
+than the less performant ones.
+
+In order to use CAS, the scheduler needs to know the capacity of each CPU in
+the system and it needs to be able to compute scale-invariant utilization of
+CPUs, so ``intel_pstate`` provides it with the requisite information.
+
+First of all, the capacity of each CPU is represented by the ratio of its highest
+HWP performance level, multiplied by 1024, to the highest HWP performance level
+of the most performant CPU in the system, which works because the HWP performance
+units are the same for all CPUs. Second, the frequency-invariance computations,
+carried out by the scheduler to always express CPU utilization in the same units
+regardless of the frequency it is currently running at, are adjusted to take the
+CPU capacity into account. All of this happens when ``intel_pstate`` has
+registered itself with the ``CPUFreq`` core and it has figured out that it is
+running on a hybrid processor without SMT.
+
+Energy-Aware Scheduling Support
+-------------------------------
+
+If ``CONFIG_ENERGY_MODEL`` has been set during kernel configuration and
+``intel_pstate`` runs on a hybrid processor without SMT, in addition to enabling
+:ref:`CAS` it registers an Energy Model for the processor. This allows the
+Energy-Aware Scheduling (EAS) support to be enabled in the CPU scheduler if
+``schedutil`` is used as the ``CPUFreq`` governor which requires ``intel_pstate``
+to operate in the :ref:`passive mode <passive_mode>`.
+
+The Energy Model registered by ``intel_pstate`` is artificial (that is, it is
+based on abstract cost values and it does not include any real power numbers)
+and it is relatively simple to avoid unnecessary computations in the scheduler.
+There is a performance domain in it for every CPU in the system and the cost
+values for these performance domains have been chosen so that running a task on
+a less performant (small) CPU appears to be always cheaper than running that
+task on a more performant (big) CPU. However, for two CPUs of the same type,
+the cost difference depends on their current utilization, and the CPU whose
+current utilization is higher generally appears to be a more expensive
+destination for a given task. This helps to balance the load among CPUs of the
+same type.
+
+Since EAS works on top of CAS, high-utilization tasks are always migrated to
+CPUs with enough capacity to accommodate them, but thanks to EAS, low-utilization
+tasks tend to be placed on the CPUs that look less expensive to the scheduler.
+Effectively, this causes the less performant and less loaded CPUs to be
+preferred as long as they have enough spare capacity to run the given task
+which generally leads to reduced energy usage.
+
+The Energy Model created by ``intel_pstate`` can be inspected by looking at
+the ``energy_model`` directory in ``debugfs`` (typlically mounted on
+``/sys/kernel/debug/``).
+
+
User Space Interface in ``sysfs``
=================================
+.. _global_attributes:
+
Global Attributes
-----------------
@@ -344,8 +453,8 @@ argument is passed to the kernel in the command line.
``max_perf_pct``
Maximum P-state the driver is allowed to set in percent of the
- maximum supported performance level (the highest supported `turbo
- P-state <turbo_>`_).
+ maximum supported performance level (the highest supported :ref:`turbo
+ P-state <turbo>`).
This attribute will not be exposed if the
``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
@@ -353,8 +462,8 @@ argument is passed to the kernel in the command line.
``min_perf_pct``
Minimum P-state the driver is allowed to set in percent of the
- maximum supported performance level (the highest supported `turbo
- P-state <turbo_>`_).
+ maximum supported performance level (the highest supported :ref:`turbo
+ P-state <turbo>`).
This attribute will not be exposed if the
``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
@@ -363,18 +472,18 @@ argument is passed to the kernel in the command line.
``num_pstates``
Number of P-states supported by the processor (between 0 and 255
inclusive) including both turbo and non-turbo P-states (see
- `Turbo P-states Support`_).
+ :ref:`turbo`).
This attribute is present only if the value exposed by it is the same
for all of the CPUs in the system.
The value of this attribute is not affected by the ``no_turbo``
- setting described `below <no_turbo_attr_>`_.
+ setting described :ref:`below <no_turbo_attr>`.
This attribute is read-only.
``turbo_pct``
- Ratio of the `turbo range <turbo_>`_ size to the size of the entire
+ Ratio of the :ref:`turbo range <turbo>` size to the size of the entire
range of supported P-states, in percent.
This attribute is present only if the value exposed by it is the same
@@ -386,7 +495,7 @@ argument is passed to the kernel in the command line.
``no_turbo``
If set (equal to 1), the driver is not allowed to set any turbo P-states
- (see `Turbo P-states Support`_). If unset (equal to 0, which is the
+ (see :ref:`turbo`). If unset (equal to 0, which is the
default), turbo P-states can be set by the driver.
[Note that ``intel_pstate`` does not support the general ``boost``
attribute (supported by some other scaling drivers) which is replaced
@@ -395,11 +504,11 @@ argument is passed to the kernel in the command line.
This attribute does not affect the maximum supported frequency value
supplied to the ``CPUFreq`` core and exposed via the policy interface,
but it affects the maximum possible value of per-policy P-state limits
- (see `Interpretation of Policy Attributes`_ below for details).
+ (see :ref:`policy_attributes_interpretation` below for details).
``hwp_dynamic_boost``
This attribute is only present if ``intel_pstate`` works in the
- `active mode with the HWP feature enabled <Active Mode With HWP_>`_ in
+ :ref:`active mode with the HWP feature enabled <active_mode_hwp>` in
the processor. If set (equal to 1), it causes the minimum P-state limit
to be increased dynamically for a short time whenever a task previously
waiting on I/O is selected to run on a given logical CPU (the purpose
@@ -414,12 +523,12 @@ argument is passed to the kernel in the command line.
Operation mode of the driver: "active", "passive" or "off".
"active"
- The driver is functional and in the `active mode
- <Active Mode_>`_.
+ The driver is functional and in the :ref:`active mode
+ <active_mode>`.
"passive"
- The driver is functional and in the `passive mode
- <Passive Mode_>`_.
+ The driver is functional and in the :ref:`passive mode
+ <passive_mode>`.
"off"
The driver is not functional (it is not registered as a scaling
@@ -447,13 +556,15 @@ argument is passed to the kernel in the command line.
attribute to "1" enables the energy-efficiency optimizations and setting
to "0" disables them.
+.. _policy_attributes_interpretation:
+
Interpretation of Policy Attributes
-----------------------------------
The interpretation of some ``CPUFreq`` policy attributes described in
Documentation/admin-guide/pm/cpufreq.rst is special with ``intel_pstate``
as the current scaling driver and it generally depends on the driver's
-`operation mode <Operation Modes_>`_.
+:ref:`operation mode <operation_modes>`.
First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
``scaling_cur_freq`` attributes are produced by applying a processor-specific
@@ -462,9 +573,10 @@ Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq``
attributes are capped by the frequency corresponding to the maximum P-state that
the driver is allowed to set.
-If the ``no_turbo`` `global attribute <no_turbo_attr_>`_ is set, the driver is
-not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq``
-and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency.
+If the ``no_turbo`` :ref:`global attribute <no_turbo_attr>` is set, the driver
+is not allowed to use turbo P-states, so the maximum value of
+``scaling_max_freq`` and ``scaling_min_freq`` is limited to the maximum
+non-turbo P-state frequency.
Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and
``scaling_min_freq`` to go down to that value if they were above it before.
However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be
@@ -476,7 +588,7 @@ and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state,
which also is the value of ``cpuinfo_max_freq`` in either case.
Next, the following policy attributes have special meaning if
-``intel_pstate`` works in the `active mode <Active Mode_>`_:
+``intel_pstate`` works in the :ref:`active mode <active_mode>`:
``scaling_available_governors``
List of P-state selection algorithms provided by ``intel_pstate``.
@@ -497,20 +609,22 @@ processor:
Shows the base frequency of the CPU. Any frequency above this will be
in the turbo frequency range.
-The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
+The meaning of these attributes in the :ref:`passive mode <passive_mode>` is the
same as for other scaling drivers.
Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate``
depends on the operation mode of the driver. Namely, it is either
-"intel_pstate" (in the `active mode <Active Mode_>`_) or "intel_cpufreq" (in the
-`passive mode <Passive Mode_>`_).
+"intel_pstate" (in the :ref:`active mode <active_mode>`) or "intel_cpufreq"
+(in the :ref:`passive mode <passive_mode>`).
+
+.. _pstate_limits_coordination:
Coordination of P-State Limits
------------------------------
``intel_pstate`` allows P-state limits to be set in two ways: with the help of
-the ``max_perf_pct`` and ``min_perf_pct`` `global attributes
-<Global Attributes_>`_ or via the ``scaling_max_freq`` and ``scaling_min_freq``
+the ``max_perf_pct`` and ``min_perf_pct`` :ref:`global attributes
+<global_attributes>` or via the ``scaling_max_freq`` and ``scaling_min_freq``
``CPUFreq`` policy attributes. The coordination between those limits is based
on the following rules, regardless of the current operation mode of the driver:
@@ -532,17 +646,18 @@ on the following rules, regardless of the current operation mode of the driver:
3. The global and per-policy limits can be set independently.
-In the `active mode with the HWP feature enabled <Active Mode With HWP_>`_, the
+In the :ref:`active mode with the HWP feature enabled <active_mode_hwp>`, the
resulting effective values are written into hardware registers whenever the
limits change in order to request its internal P-state selection logic to always
set P-states within these limits. Otherwise, the limits are taken into account
-by scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
-every time before setting a new P-state for a CPU.
+by scaling governors (in the :ref:`passive mode <passive_mode>`) and by the
+driver every time before setting a new P-state for a CPU.
Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed
at all and the only way to set the limits is by using the policy attributes.
+.. _energy_performance_hints:
Energy vs Performance Hints
---------------------------
@@ -602,9 +717,9 @@ output.
On those systems each ``_PSS`` object returns a list of P-states supported by
the corresponding CPU which basically is a subset of the P-states range that can
be used by ``intel_pstate`` on the same system, with one exception: the whole
-`turbo range <turbo_>`_ is represented by one item in it (the topmost one). By
-convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz
-than the frequency of the highest non-turbo P-state listed by it, but the
+:ref:`turbo range <turbo>` is represented by one item in it (the topmost one).
+By convention, the frequency returned by ``_PSS`` for that item is greater by
+1 MHz than the frequency of the highest non-turbo P-state listed by it, but the
corresponding P-state representation (following the hardware specification)
returned for it matches the maximum supported turbo P-state (or is the
special value 255 meaning essentially "go as high as you can get").
@@ -630,18 +745,18 @@ benefit from running at turbo frequencies will be given non-turbo P-states
instead.
One more issue related to that may appear on systems supporting the
-`Configurable TDP feature <turbo_>`_ allowing the platform firmware to set the
-turbo threshold. Namely, if that is not coordinated with the lists of P-states
-returned by ``_PSS`` properly, there may be more than one item corresponding to
-a turbo P-state in those lists and there may be a problem with avoiding the
-turbo range (if desirable or necessary). Usually, to avoid using turbo
-P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed
-by ``_PSS``, but that is not sufficient when there are other turbo P-states in
-the list returned by it.
+:ref:`Configurable TDP feature <turbo>` allowing the platform firmware to set
+the turbo threshold. Namely, if that is not coordinated with the lists of
+P-states returned by ``_PSS`` properly, there may be more than one item
+corresponding to a turbo P-state in those lists and there may be a problem with
+avoiding the turbo range (if desirable or necessary). Usually, to avoid using
+turbo P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state
+listed by ``_PSS``, but that is not sufficient when there are other turbo
+P-states in the list returned by it.
Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the
-`passive mode <Passive Mode_>`_, except that the number of P-states it can set
-is limited to the ones listed by the ACPI ``_PSS`` objects.
+:ref:`passive mode <passive_mode>`, except that the number of P-states it can
+set is limited to the ones listed by the ACPI ``_PSS`` objects.
Kernel Command Line Options for ``intel_pstate``
@@ -656,11 +771,11 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
processor is supported by it.
``active``
- Register ``intel_pstate`` in the `active mode <Active Mode_>`_ to start
- with.
+ Register ``intel_pstate`` in the :ref:`active mode <active_mode>` to
+ start with.
``passive``
- Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
+ Register ``intel_pstate`` in the :ref:`passive mode <passive_mode>` to
start with.
``force``
@@ -693,9 +808,12 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
and this option has no effect.
``per_cpu_perf_limits``
- Use per-logical-CPU P-State limits (see `Coordination of P-state
- Limits`_ for details).
+ Use per-logical-CPU P-State limits (see
+ :ref:`pstate_limits_coordination` for details).
+``no_cas``
+ Do not enable :ref:`capacity-aware scheduling <CAS>` which is enabled
+ by default on hybrid systems without SMT.
Diagnostics and Tuning
======================
@@ -707,7 +825,7 @@ There are two static trace events that can be used for ``intel_pstate``
diagnostics. One of them is the ``cpu_frequency`` trace event generally used
by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific
to ``intel_pstate``. Both of them are triggered by ``intel_pstate`` only if
-it works in the `active mode <Active Mode_>`_.
+it works in the :ref:`active mode <active_mode>`.
The following sequence of shell commands can be used to enable them and see
their output (if the kernel is generally configured to support event tracing)::
@@ -719,7 +837,7 @@ their output (if the kernel is generally configured to support event tracing)::
gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476
cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2
-If ``intel_pstate`` works in the `passive mode <Passive Mode_>`_, the
+If ``intel_pstate`` works in the :ref:`passive mode <passive_mode>`, the
``cpu_frequency`` trace event will be triggered either by the ``schedutil``
scaling governor (for the policies it is attached to), or by the ``CPUFreq``
core (for the policies with other scaling governors).
diff --git a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
index 5ab3440e6cee..d367ba4d744a 100644
--- a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
+++ b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
@@ -91,12 +91,22 @@ Attributes in each directory:
``domain_id``
This attribute is used to get the power domain id of this instance.
+``die_id``
+ This attribute is used to get the Linux die id of this instance.
+ This attribute is only present for domains with core agents and
+ when the CPUID leaf 0x1f presents die ID.
+
``fabric_cluster_id``
This attribute is used to get the fabric cluster id of this instance.
``package_id``
This attribute is used to get the package id of this instance.
+``agent_types``
+ This attribute displays all the hardware agents present within the
+ domain. Each agent has the capability to control one or more hardware
+ subsystems, which include: core, cache, memory, and I/O.
+
The other attributes are same as presented at package_*_die_* level.
In most of current use cases, the "max_freq_khz" and "min_freq_khz"
@@ -113,3 +123,62 @@ to apply at each uncore* level.
Support for "current_freq_khz" is available only at each fabric cluster
level (i.e., in uncore* directory).
+
+Efficiency vs. Latency Tradeoff
+-------------------------------
+
+The Efficiency Latency Control (ELC) feature improves performance
+per watt. With this feature hardware power management algorithms
+optimize trade-off between latency and power consumption. For some
+latency sensitive workloads further tuning can be done by SW to
+get desired performance.
+
+The hardware monitors the average CPU utilization across all cores
+in a power domain at regular intervals and decides an uncore frequency.
+While this may result in the best performance per watt, workload may be
+expecting higher performance at the expense of power. Consider an
+application that intermittently wakes up to perform memory reads on an
+otherwise idle system. In such cases, if hardware lowers uncore
+frequency, then there may be delay in ramp up of frequency to meet
+target performance.
+
+The ELC control defines some parameters which can be changed from SW.
+If the average CPU utilization is below a user-defined threshold
+(elc_low_threshold_percent attribute below), the user-defined uncore
+floor frequency will be used (elc_floor_freq_khz attribute below)
+instead of hardware calculated minimum.
+
+Similarly in high load scenario where the CPU utilization goes above
+the high threshold value (elc_high_threshold_percent attribute below)
+instead of jumping to maximum uncore frequency, frequency is increased
+in 100MHz steps. This avoids consuming unnecessarily high power
+immediately with CPU utilization spikes.
+
+Attributes for efficiency latency control:
+
+``elc_floor_freq_khz``
+ This attribute is used to get/set the efficiency latency floor frequency.
+ If this variable is lower than the 'min_freq_khz', it is ignored by
+ the firmware.
+
+``elc_low_threshold_percent``
+ This attribute is used to get/set the efficiency latency control low
+ threshold. This attribute is in percentages of CPU utilization.
+
+``elc_high_threshold_percent``
+ This attribute is used to get/set the efficiency latency control high
+ threshold. This attribute is in percentages of CPU utilization.
+
+``elc_high_threshold_enable``
+ This attribute is used to enable/disable the efficiency latency control
+ high threshold. Write '1' to enable, '0' to disable.
+
+Example system configuration below, which does following:
+ * when CPU utilization is less than 10%: sets uncore frequency to 800MHz
+ * when CPU utilization is higher than 95%: increases uncore frequency in
+ 100MHz steps, until power limit is reached
+
+ elc_floor_freq_khz:800000
+ elc_high_threshold_percent:95
+ elc_high_threshold_enable:1
+ elc_low_threshold_percent:10
diff --git a/Documentation/admin-guide/pmf.rst b/Documentation/admin-guide/pmf.rst
deleted file mode 100644
index 9ee729ffc19b..000000000000
--- a/Documentation/admin-guide/pmf.rst
+++ /dev/null
@@ -1,24 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-Set udev rules for PMF Smart PC Builder
----------------------------------------
-
-AMD PMF(Platform Management Framework) Smart PC Solution builder has to set the system states
-like S0i3, Screen lock, hibernate etc, based on the output actions provided by the PMF
-TA (Trusted Application).
-
-In order for this to work the PMF driver generates a uevent for userspace to react to. Below are
-sample udev rules that can facilitate this experience when a machine has PMF Smart PC solution builder
-enabled.
-
-Please add the following line(s) to
-``/etc/udev/rules.d/99-local.rules``::
-
- DRIVERS=="amd-pmf", ACTION=="change", ENV{EVENT_ID}=="0", RUN+="/usr/bin/systemctl suspend"
- DRIVERS=="amd-pmf", ACTION=="change", ENV{EVENT_ID}=="1", RUN+="/usr/bin/systemctl hibernate"
- DRIVERS=="amd-pmf", ACTION=="change", ENV{EVENT_ID}=="2", RUN+="/bin/loginctl lock-sessions"
-
-EVENT_ID values:
-0= Put the system to S0i3/S2Idle
-1= Put the system to hibernate
-2= Lock the screen
diff --git a/Documentation/admin-guide/pnp.rst b/Documentation/admin-guide/pnp.rst
index 3eda08191d13..24d80e3eb309 100644
--- a/Documentation/admin-guide/pnp.rst
+++ b/Documentation/admin-guide/pnp.rst
@@ -129,9 +129,6 @@ pnp_put_protocol
pnp_register_protocol
use this to register a new PnP protocol
-pnp_unregister_protocol
- use this function to remove a PnP protocol from the Plug and Play Layer
-
pnp_register_driver
adds a PnP driver to the Plug and Play Layer
diff --git a/Documentation/admin-guide/quickly-build-trimmed-linux.rst b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
index f08149bc53f8..cb4b78468a93 100644
--- a/Documentation/admin-guide/quickly-build-trimmed-linux.rst
+++ b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
@@ -273,7 +273,7 @@ again.
does nothing at all; in that case you have to manually install your kernel,
as outlined in the reference section.
- If you are running a immutable Linux distribution, check its documentation
+ If you are running an immutable Linux distribution, check its documentation
and the web to find out how to install your own kernel there.
[:ref:`details<install>`]
@@ -347,7 +347,7 @@ again.
[:ref:`details<uninstall>`]
-.. _submit_improvements:
+.. _submit_improvements_qbtl:
Did you run into trouble following any of the above steps that is not cleared up
by the reference section below? Or do you have ideas how to improve the text?
@@ -733,7 +733,7 @@ can easily happen that your self-built kernel will lack modules for tasks you
did not perform before utilizing this make target. That's because those tasks
require kernel modules that are normally autoloaded when you perform that task
for the first time; if you didn't perform that task at least once before using
-localmodonfig, the latter will thus assume these modules are superfluous and
+localmodconfig, the latter will thus assume these modules are superfluous and
disable them.
You can try to avoid this by performing typical tasks that often will autoload
@@ -884,7 +884,7 @@ When a build error occurs, it might be caused by some aspect of your machine's
setup that often can be fixed quickly; other times though the problem lies in
the code and can only be fixed by a developer. A close examination of the
failure messages coupled with some research on the internet will often tell you
-which of the two it is. To perform such a investigation, restart the build
+which of the two it is. To perform such an investigation, restart the build
process like this::
make V=1
@@ -1070,7 +1070,7 @@ complicated, and harder to follow.
That being said: this of course is a balancing act. Hence, if you think an
additional use-case is worth describing, suggest it to the maintainers of this
-document, as :ref:`described above <submit_improvements>`.
+document, as :ref:`described above <submit_improvements_qbtl>`.
..
diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst
index e9f85142182d..2eabef31220d 100644
--- a/Documentation/admin-guide/ramoops.rst
+++ b/Documentation/admin-guide/ramoops.rst
@@ -23,6 +23,8 @@ and type of the memory area are set using three variables:
* ``mem_size`` for the size. The memory size will be rounded down to a
power of two.
* ``mem_type`` to specify if the memory type (default is pgprot_writecombine).
+ * ``mem_name`` to specify a memory region defined by ``reserve_mem`` command
+ line parameter.
Typically the default value of ``mem_type=0`` should be used as that sets the pstore
mapping to pgprot_writecombine. Setting ``mem_type=1`` attempts to use
@@ -118,6 +120,17 @@ Setting the ramoops parameters can be done in several different manners:
return ret;
}
+ D. Using a region of memory reserved via ``reserve_mem`` command line
+ parameter. The address and size will be defined by the ``reserve_mem``
+ parameter. Note, that ``reserve_mem`` may not always allocate memory
+ in the same location, and cannot be relied upon. Testing will need
+ to be done, and it may not work on every machine, nor every kernel.
+ Consider this a "best effort" approach. The ``reserve_mem`` option
+ takes a size, alignment and name as arguments. The name is used
+ to map the memory to a label that can be retrieved by ramoops.
+
+ reserve_mem=2M:4096:oops ramoops.mem_name=oops
+
You can specify either RAM memory or peripheral devices' memory. However, when
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()
very early in the architecture code, e.g.::
diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
deleted file mode 100644
index 8e03751d126d..000000000000
--- a/Documentation/admin-guide/ras.rst
+++ /dev/null
@@ -1,1219 +0,0 @@
-.. include:: <isonum.txt>
-
-============================================
-Reliability, Availability and Serviceability
-============================================
-
-RAS concepts
-************
-
-Reliability, Availability and Serviceability (RAS) is a concept used on
-servers meant to measure their robustness.
-
-Reliability
- is the probability that a system will produce correct outputs.
-
- * Generally measured as Mean Time Between Failures (MTBF)
- * Enhanced by features that help to avoid, detect and repair hardware faults
-
-Availability
- is the probability that a system is operational at a given time
-
- * Generally measured as a percentage of downtime per a period of time
- * Often uses mechanisms to detect and correct hardware faults in
- runtime;
-
-Serviceability (or maintainability)
- is the simplicity and speed with which a system can be repaired or
- maintained
-
- * Generally measured on Mean Time Between Repair (MTBR)
-
-Improving RAS
--------------
-
-In order to reduce systems downtime, a system should be capable of detecting
-hardware errors, and, when possible correcting them in runtime. It should
-also provide mechanisms to detect hardware degradation, in order to warn
-the system administrator to take the action of replacing a component before
-it causes data loss or system downtime.
-
-Among the monitoring measures, the most usual ones include:
-
-* CPU – detect errors at instruction execution and at L1/L2/L3 caches;
-* Memory – add error correction logic (ECC) to detect and correct errors;
-* I/O – add CRC checksums for transferred data;
-* Storage – RAID, journal file systems, checksums,
- Self-Monitoring, Analysis and Reporting Technology (SMART).
-
-By monitoring the number of occurrences of error detections, it is possible
-to identify if the probability of hardware errors is increasing, and, on such
-case, do a preventive maintenance to replace a degraded component while
-those errors are correctable.
-
-Types of errors
----------------
-
-Most mechanisms used on modern systems use technologies like Hamming
-Codes that allow error correction when the number of errors on a bit packet
-is below a threshold. If the number of errors is above, those mechanisms
-can indicate with a high degree of confidence that an error happened, but
-they can't correct.
-
-Also, sometimes an error occur on a component that it is not used. For
-example, a part of the memory that it is not currently allocated.
-
-That defines some categories of errors:
-
-* **Correctable Error (CE)** - the error detection mechanism detected and
- corrected the error. Such errors are usually not fatal, although some
- Kernel mechanisms allow the system administrator to consider them as fatal.
-
-* **Uncorrected Error (UE)** - the amount of errors happened above the error
- correction threshold, and the system was unable to auto-correct.
-
-* **Fatal Error** - when an UE error happens on a critical component of the
- system (for example, a piece of the Kernel got corrupted by an UE), the
- only reliable way to avoid data corruption is to hang or reboot the machine.
-
-* **Non-fatal Error** - when an UE error happens on an unused component,
- like a CPU in power down state or an unused memory bank, the system may
- still run, eventually replacing the affected hardware by a hot spare,
- if available.
-
- Also, when an error happens on a userspace process, it is also possible to
- kill such process and let userspace restart it.
-
-The mechanism for handling non-fatal errors is usually complex and may
-require the help of some userspace application, in order to apply the
-policy desired by the system administrator.
-
-Identifying a bad hardware component
-------------------------------------
-
-Just detecting a hardware flaw is usually not enough, as the system needs
-to pinpoint to the minimal replaceable unit (MRU) that should be exchanged
-to make the hardware reliable again.
-
-So, it requires not only error logging facilities, but also mechanisms that
-will translate the error message to the silkscreen or component label for
-the MRU.
-
-Typically, it is very complex for memory, as modern CPUs interlace memory
-from different memory modules, in order to provide a better performance. The
-DMI BIOS usually have a list of memory module labels, with can be obtained
-using the ``dmidecode`` tool. For example, on a desktop machine, it shows::
-
- Memory Device
- Total Width: 64 bits
- Data Width: 64 bits
- Size: 16384 MB
- Form Factor: SODIMM
- Set: None
- Locator: ChannelA-DIMM0
- Bank Locator: BANK 0
- Type: DDR4
- Type Detail: Synchronous
- Speed: 2133 MHz
- Rank: 2
- Configured Clock Speed: 2133 MHz
-
-On the above example, a DDR4 SO-DIMM memory module is located at the
-system's memory labeled as "BANK 0", as given by the *bank locator* field.
-Please notice that, on such system, the *total width* is equal to the
-*data width*. It means that such memory module doesn't have error
-detection/correction mechanisms.
-
-Unfortunately, not all systems use the same field to specify the memory
-bank. On this example, from an older server, ``dmidecode`` shows::
-
- Memory Device
- Array Handle: 0x1000
- Error Information Handle: Not Provided
- Total Width: 72 bits
- Data Width: 64 bits
- Size: 8192 MB
- Form Factor: DIMM
- Set: 1
- Locator: DIMM_A1
- Bank Locator: Not Specified
- Type: DDR3
- Type Detail: Synchronous Registered (Buffered)
- Speed: 1600 MHz
- Rank: 2
- Configured Clock Speed: 1600 MHz
-
-There, the DDR3 RDIMM memory module is located at the system's memory labeled
-as "DIMM_A1", as given by the *locator* field. Please notice that this
-memory module has 64 bits of *data width* and 72 bits of *total width*. So,
-it has 8 extra bits to be used by error detection and correction mechanisms.
-Such kind of memory is called Error-correcting code memory (ECC memory).
-
-To make things even worse, it is not uncommon that systems with different
-labels on their system's board to use exactly the same BIOS, meaning that
-the labels provided by the BIOS won't match the real ones.
-
-ECC memory
-----------
-
-As mentioned in the previous section, ECC memory has extra bits to be
-used for error correction. In the above example, a memory module has
-64 bits of *data width*, and 72 bits of *total width*. The extra 8
-bits which are used for the error detection and correction mechanisms
-are referred to as the *syndrome*\ [#f1]_\ [#f2]_.
-
-So, when the cpu requests the memory controller to write a word with
-*data width*, the memory controller calculates the *syndrome* in real time,
-using Hamming code, or some other error correction code, like SECDED+,
-producing a code with *total width* size. Such code is then written
-on the memory modules.
-
-At read, the *total width* bits code is converted back, using the same
-ECC code used on write, producing a word with *data width* and a *syndrome*.
-The word with *data width* is sent to the CPU, even when errors happen.
-
-The memory controller also looks at the *syndrome* in order to check if
-there was an error, and if the ECC code was able to fix such error.
-If the error was corrected, a Corrected Error (CE) happened. If not, an
-Uncorrected Error (UE) happened.
-
-The information about the CE/UE errors is stored on some special registers
-at the memory controller and can be accessed by reading such registers,
-either by BIOS, by some special CPUs or by Linux EDAC driver. On x86 64
-bit CPUs, such errors can also be retrieved via the Machine Check
-Architecture (MCA)\ [#f3]_.
-
-.. [#f1] Please notice that several memory controllers allow operation on a
- mode called "Lock-Step", where it groups two memory modules together,
- doing 128-bit reads/writes. That gives 16 bits for error correction, with
- significantly improves the error correction mechanism, at the expense
- that, when an error happens, there's no way to know what memory module is
- to blame. So, it has to blame both memory modules.
-
-.. [#f2] Some memory controllers also allow using memory in mirror mode.
- On such mode, the same data is written to two memory modules. At read,
- the system checks both memory modules, in order to check if both provide
- identical data. On such configuration, when an error happens, there's no
- way to know what memory module is to blame. So, it has to blame both
- memory modules (or 4 memory modules, if the system is also on Lock-step
- mode).
-
-.. [#f3] For more details about the Machine Check Architecture (MCA),
- please read Documentation/arch/x86/x86_64/machinecheck.rst at the Kernel tree.
-
-EDAC - Error Detection And Correction
-*************************************
-
-.. note::
-
- "bluesmoke" was the name for this device driver subsystem when it
- was "out-of-tree" and maintained at http://bluesmoke.sourceforge.net.
- That site is mostly archaic now and can be used only for historical
- purposes.
-
- When the subsystem was pushed upstream for the first time, on
- Kernel 2.6.16, it was renamed to ``EDAC``.
-
-Purpose
--------
-
-The ``edac`` kernel module's goal is to detect and report hardware errors
-that occur within the computer system running under linux.
-
-Memory
-------
-
-Memory Correctable Errors (CE) and Uncorrectable Errors (UE) are the
-primary errors being harvested. These types of errors are harvested by
-the ``edac_mc`` device.
-
-Detecting CE events, then harvesting those events and reporting them,
-**can** but must not necessarily be a predictor of future UE events. With
-CE events only, the system can and will continue to operate as no data
-has been damaged yet.
-
-However, preventive maintenance and proactive part replacement of memory
-modules exhibiting CEs can reduce the likelihood of the dreaded UE events
-and system panics.
-
-Other hardware elements
------------------------
-
-A new feature for EDAC, the ``edac_device`` class of device, was added in
-the 2.6.23 version of the kernel.
-
-This new device type allows for non-memory type of ECC hardware detectors
-to have their states harvested and presented to userspace via the sysfs
-interface.
-
-Some architectures have ECC detectors for L1, L2 and L3 caches,
-along with DMA engines, fabric switches, main data path switches,
-interconnections, and various other hardware data paths. If the hardware
-reports it, then a edac_device device probably can be constructed to
-harvest and present that to userspace.
-
-
-PCI bus scanning
-----------------
-
-In addition, PCI devices are scanned for PCI Bus Parity and SERR Errors
-in order to determine if errors are occurring during data transfers.
-
-The presence of PCI Parity errors must be examined with a grain of salt.
-There are several add-in adapters that do **not** follow the PCI specification
-with regards to Parity generation and reporting. The specification says
-the vendor should tie the parity status bits to 0 if they do not intend
-to generate parity. Some vendors do not do this, and thus the parity bit
-can "float" giving false positives.
-
-There is a PCI device attribute located in sysfs that is checked by
-the EDAC PCI scanning code. If that attribute is set, PCI parity/error
-scanning is skipped for that device. The attribute is::
-
- broken_parity_status
-
-and is located in ``/sys/devices/pci<XXX>/0000:XX:YY.Z`` directories for
-PCI devices.
-
-
-Versioning
-----------
-
-EDAC is composed of a "core" module (``edac_core.ko``) and several Memory
-Controller (MC) driver modules. On a given system, the CORE is loaded
-and one MC driver will be loaded. Both the CORE and the MC driver (or
-``edac_device`` driver) have individual versions that reflect current
-release level of their respective modules.
-
-Thus, to "report" on what version a system is running, one must report
-both the CORE's and the MC driver's versions.
-
-
-Loading
--------
-
-If ``edac`` was statically linked with the kernel then no loading
-is necessary. If ``edac`` was built as modules then simply modprobe
-the ``edac`` pieces that you need. You should be able to modprobe
-hardware-specific modules and have the dependencies load the necessary
-core modules.
-
-Example::
-
- $ modprobe amd76x_edac
-
-loads both the ``amd76x_edac.ko`` memory controller module and the
-``edac_mc.ko`` core module.
-
-
-Sysfs interface
----------------
-
-EDAC presents a ``sysfs`` interface for control and reporting purposes. It
-lives in the /sys/devices/system/edac directory.
-
-Within this directory there currently reside 2 components:
-
- ======= ==============================
- mc memory controller(s) system
- pci PCI control and status system
- ======= ==============================
-
-
-
-Memory Controller (mc) Model
-----------------------------
-
-Each ``mc`` device controls a set of memory modules [#f4]_. These modules
-are laid out in a Chip-Select Row (``csrowX``) and Channel table (``chX``).
-There can be multiple csrows and multiple channels.
-
-.. [#f4] Nowadays, the term DIMM (Dual In-line Memory Module) is widely
- used to refer to a memory module, although there are other memory
- packaging alternatives, like SO-DIMM, SIMM, etc. The UEFI
- specification (Version 2.7) defines a memory module in the Common
- Platform Error Record (CPER) section to be an SMBIOS Memory Device
- (Type 17). Along this document, and inside the EDAC subsystem, the term
- "dimm" is used for all memory modules, even when they use a
- different kind of packaging.
-
-Memory controllers allow for several csrows, with 8 csrows being a
-typical value. Yet, the actual number of csrows depends on the layout of
-a given motherboard, memory controller and memory module characteristics.
-
-Dual channels allow for dual data length (e. g. 128 bits, on 64 bit systems)
-data transfers to/from the CPU from/to memory. Some newer chipsets allow
-for more than 2 channels, like Fully Buffered DIMMs (FB-DIMMs) memory
-controllers. The following example will assume 2 channels:
-
- +------------+-----------------------+
- | CS Rows | Channels |
- +------------+-----------+-----------+
- | | ``ch0`` | ``ch1`` |
- +============+===========+===========+
- | |**DIMM_A0**|**DIMM_B0**|
- +------------+-----------+-----------+
- | ``csrow0`` | rank0 | rank0 |
- +------------+-----------+-----------+
- | ``csrow1`` | rank1 | rank1 |
- +------------+-----------+-----------+
- | |**DIMM_A1**|**DIMM_B1**|
- +------------+-----------+-----------+
- | ``csrow2`` | rank0 | rank0 |
- +------------+-----------+-----------+
- | ``csrow3`` | rank1 | rank1 |
- +------------+-----------+-----------+
-
-In the above example, there are 4 physical slots on the motherboard
-for memory DIMMs:
-
- +---------+---------+
- | DIMM_A0 | DIMM_B0 |
- +---------+---------+
- | DIMM_A1 | DIMM_B1 |
- +---------+---------+
-
-Labels for these slots are usually silk-screened on the motherboard.
-Slots labeled ``A`` are channel 0 in this example. Slots labeled ``B`` are
-channel 1. Notice that there are two csrows possible on a physical DIMM.
-These csrows are allocated their csrow assignment based on the slot into
-which the memory DIMM is placed. Thus, when 1 DIMM is placed in each
-Channel, the csrows cross both DIMMs.
-
-Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
-In the example above 2 dual ranked DIMMs are similarly placed. Thus,
-both csrow0 and csrow1 are populated. On the other hand, when 2 single
-ranked DIMMs are placed in slots DIMM_A0 and DIMM_B0, then they will
-have just one csrow (csrow0) and csrow1 will be empty. The pattern
-repeats itself for csrow2 and csrow3. Also note that some memory
-controllers don't have any logic to identify the memory module, see
-``rankX`` directories below.
-
-The representation of the above is reflected in the directory
-tree in EDAC's sysfs interface. Starting in directory
-``/sys/devices/system/edac/mc``, each memory controller will be
-represented by its own ``mcX`` directory, where ``X`` is the
-index of the MC::
-
- ..../edac/mc/
- |
- |->mc0
- |->mc1
- |->mc2
- ....
-
-Under each ``mcX`` directory each ``csrowX`` is again represented by a
-``csrowX``, where ``X`` is the csrow index::
-
- .../mc/mc0/
- |
- |->csrow0
- |->csrow2
- |->csrow3
- ....
-
-Notice that there is no csrow1, which indicates that csrow0 is composed
-of a single ranked DIMMs. This should also apply in both Channels, in
-order to have dual-channel mode be operational. Since both csrow2 and
-csrow3 are populated, this indicates a dual ranked set of DIMMs for
-channels 0 and 1.
-
-Within each of the ``mcX`` and ``csrowX`` directories are several EDAC
-control and attribute files.
-
-``mcX`` directories
--------------------
-
-In ``mcX`` directories are EDAC control and attribute files for
-this ``X`` instance of the memory controllers.
-
-For a description of the sysfs API, please see:
-
- Documentation/ABI/testing/sysfs-devices-edac
-
-
-``dimmX`` or ``rankX`` directories
-----------------------------------
-
-The recommended way to use the EDAC subsystem is to look at the information
-provided by the ``dimmX`` or ``rankX`` directories [#f5]_.
-
-A typical EDAC system has the following structure under
-``/sys/devices/system/edac/``\ [#f6]_::
-
- /sys/devices/system/edac/
- ├── mc
- │   ├── mc0
- │   │   ├── ce_count
- │   │   ├── ce_noinfo_count
- │   │   ├── dimm0
- │   │   │   ├── dimm_ce_count
- │   │   │   ├── dimm_dev_type
- │   │   │   ├── dimm_edac_mode
- │   │   │   ├── dimm_label
- │   │   │   ├── dimm_location
- │   │   │   ├── dimm_mem_type
- │   │   │   ├── dimm_ue_count
- │   │   │   ├── size
- │   │   │   └── uevent
- │   │   ├── max_location
- │   │   ├── mc_name
- │   │   ├── reset_counters
- │   │   ├── seconds_since_reset
- │   │   ├── size_mb
- │   │   ├── ue_count
- │   │   ├── ue_noinfo_count
- │   │   └── uevent
- │   ├── mc1
- │   │   ├── ce_count
- │   │   ├── ce_noinfo_count
- │   │   ├── dimm0
- │   │   │   ├── dimm_ce_count
- │   │   │   ├── dimm_dev_type
- │   │   │   ├── dimm_edac_mode
- │   │   │   ├── dimm_label
- │   │   │   ├── dimm_location
- │   │   │   ├── dimm_mem_type
- │   │   │   ├── dimm_ue_count
- │   │   │   ├── size
- │   │   │   └── uevent
- │   │   ├── max_location
- │   │   ├── mc_name
- │   │   ├── reset_counters
- │   │   ├── seconds_since_reset
- │   │   ├── size_mb
- │   │   ├── ue_count
- │   │   ├── ue_noinfo_count
- │   │   └── uevent
- │   └── uevent
- └── uevent
-
-In the ``dimmX`` directories are EDAC control and attribute files for
-this ``X`` memory module:
-
-- ``size`` - Total memory managed by this csrow attribute file
-
- This attribute file displays, in count of megabytes, the memory
- that this csrow contains.
-
-- ``dimm_ue_count`` - Uncorrectable Errors count attribute file
-
- This attribute file displays the total count of uncorrectable
- errors that have occurred on this DIMM. If panic_on_ue is set
- this counter will not have a chance to increment, since EDAC
- will panic the system.
-
-- ``dimm_ce_count`` - Correctable Errors count attribute file
-
- This attribute file displays the total count of correctable
- errors that have occurred on this DIMM. This count is very
- important to examine. CEs provide early indications that a
- DIMM is beginning to fail. This count field should be
- monitored for non-zero values and report such information
- to the system administrator.
-
-- ``dimm_dev_type`` - Device type attribute file
-
- This attribute file will display what type of DRAM device is
- being utilized on this DIMM.
- Examples:
-
- - x1
- - x2
- - x4
- - x8
-
-- ``dimm_edac_mode`` - EDAC Mode of operation attribute file
-
- This attribute file will display what type of Error detection
- and correction is being utilized.
-
-- ``dimm_label`` - memory module label control file
-
- This control file allows this DIMM to have a label assigned
- to it. With this label in the module, when errors occur
- the output can provide the DIMM label in the system log.
- This becomes vital for panic events to isolate the
- cause of the UE event.
-
- DIMM Labels must be assigned after booting, with information
- that correctly identifies the physical slot with its
- silk screen label. This information is currently very
- motherboard specific and determination of this information
- must occur in userland at this time.
-
-- ``dimm_location`` - location of the memory module
-
- The location can have up to 3 levels, and describe how the
- memory controller identifies the location of a memory module.
- Depending on the type of memory and memory controller, it
- can be:
-
- - *csrow* and *channel* - used when the memory controller
- doesn't identify a single DIMM - e. g. in ``rankX`` dir;
- - *branch*, *channel*, *slot* - typically used on FB-DIMM memory
- controllers;
- - *channel*, *slot* - used on Nehalem and newer Intel drivers.
-
-- ``dimm_mem_type`` - Memory Type attribute file
-
- This attribute file will display what type of memory is currently
- on this csrow. Normally, either buffered or unbuffered memory.
- Examples:
-
- - Registered-DDR
- - Unbuffered-DDR
-
-.. [#f5] On some systems, the memory controller doesn't have any logic
- to identify the memory module. On such systems, the directory is called ``rankX`` and works on a similar way as the ``csrowX`` directories.
- On modern Intel memory controllers, the memory controller identifies the
- memory modules directly. On such systems, the directory is called ``dimmX``.
-
-.. [#f6] There are also some ``power`` directories and ``subsystem``
- symlinks inside the sysfs mapping that are automatically created by
- the sysfs subsystem. Currently, they serve no purpose.
-
-``csrowX`` directories
-----------------------
-
-When CONFIG_EDAC_LEGACY_SYSFS is enabled, sysfs will contain the ``csrowX``
-directories. As this API doesn't work properly for Rambus, FB-DIMMs and
-modern Intel Memory Controllers, this is being deprecated in favor of
-``dimmX`` directories.
-
-In the ``csrowX`` directories are EDAC control and attribute files for
-this ``X`` instance of csrow:
-
-
-- ``ue_count`` - Total Uncorrectable Errors count attribute file
-
- This attribute file displays the total count of uncorrectable
- errors that have occurred on this csrow. If panic_on_ue is set
- this counter will not have a chance to increment, since EDAC
- will panic the system.
-
-
-- ``ce_count`` - Total Correctable Errors count attribute file
-
- This attribute file displays the total count of correctable
- errors that have occurred on this csrow. This count is very
- important to examine. CEs provide early indications that a
- DIMM is beginning to fail. This count field should be
- monitored for non-zero values and report such information
- to the system administrator.
-
-
-- ``size_mb`` - Total memory managed by this csrow attribute file
-
- This attribute file displays, in count of megabytes, the memory
- that this csrow contains.
-
-
-- ``mem_type`` - Memory Type attribute file
-
- This attribute file will display what type of memory is currently
- on this csrow. Normally, either buffered or unbuffered memory.
- Examples:
-
- - Registered-DDR
- - Unbuffered-DDR
-
-
-- ``edac_mode`` - EDAC Mode of operation attribute file
-
- This attribute file will display what type of Error detection
- and correction is being utilized.
-
-
-- ``dev_type`` - Device type attribute file
-
- This attribute file will display what type of DRAM device is
- being utilized on this DIMM.
- Examples:
-
- - x1
- - x2
- - x4
- - x8
-
-
-- ``ch0_ce_count`` - Channel 0 CE Count attribute file
-
- This attribute file will display the count of CEs on this
- DIMM located in channel 0.
-
-
-- ``ch0_ue_count`` - Channel 0 UE Count attribute file
-
- This attribute file will display the count of UEs on this
- DIMM located in channel 0.
-
-
-- ``ch0_dimm_label`` - Channel 0 DIMM Label control file
-
-
- This control file allows this DIMM to have a label assigned
- to it. With this label in the module, when errors occur
- the output can provide the DIMM label in the system log.
- This becomes vital for panic events to isolate the
- cause of the UE event.
-
- DIMM Labels must be assigned after booting, with information
- that correctly identifies the physical slot with its
- silk screen label. This information is currently very
- motherboard specific and determination of this information
- must occur in userland at this time.
-
-
-- ``ch1_ce_count`` - Channel 1 CE Count attribute file
-
-
- This attribute file will display the count of CEs on this
- DIMM located in channel 1.
-
-
-- ``ch1_ue_count`` - Channel 1 UE Count attribute file
-
-
- This attribute file will display the count of UEs on this
- DIMM located in channel 0.
-
-
-- ``ch1_dimm_label`` - Channel 1 DIMM Label control file
-
- This control file allows this DIMM to have a label assigned
- to it. With this label in the module, when errors occur
- the output can provide the DIMM label in the system log.
- This becomes vital for panic events to isolate the
- cause of the UE event.
-
- DIMM Labels must be assigned after booting, with information
- that correctly identifies the physical slot with its
- silk screen label. This information is currently very
- motherboard specific and determination of this information
- must occur in userland at this time.
-
-
-System Logging
---------------
-
-If logging for UEs and CEs is enabled, then system logs will contain
-information indicating that errors have been detected::
-
- EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0, channel 1 "DIMM_B1": amd76x_edac
- EDAC MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0, channel 1 "DIMM_B1": amd76x_edac
-
-
-The structure of the message is:
-
- +---------------------------------------+-------------+
- | Content | Example |
- +=======================================+=============+
- | The memory controller | MC0 |
- +---------------------------------------+-------------+
- | Error type | CE |
- +---------------------------------------+-------------+
- | Memory page | 0x283 |
- +---------------------------------------+-------------+
- | Offset in the page | 0xce0 |
- +---------------------------------------+-------------+
- | The byte granularity | grain 8 |
- | or resolution of the error | |
- +---------------------------------------+-------------+
- | The error syndrome | 0xb741 |
- +---------------------------------------+-------------+
- | Memory row | row 0 |
- +---------------------------------------+-------------+
- | Memory channel | channel 1 |
- +---------------------------------------+-------------+
- | DIMM label, if set prior | DIMM B1 |
- +---------------------------------------+-------------+
- | And then an optional, driver-specific | |
- | message that may have additional | |
- | information. | |
- +---------------------------------------+-------------+
-
-Both UEs and CEs with no info will lack all but memory controller, error
-type, a notice of "no info" and then an optional, driver-specific error
-message.
-
-
-PCI Bus Parity Detection
-------------------------
-
-On Header Type 00 devices, the primary status is looked at for any
-parity error regardless of whether parity is enabled on the device or
-not. (The spec indicates parity is generated in some cases). On Header
-Type 01 bridges, the secondary status register is also looked at to see
-if parity occurred on the bus on the other side of the bridge.
-
-
-Sysfs configuration
--------------------
-
-Under ``/sys/devices/system/edac/pci`` are control and attribute files as
-follows:
-
-
-- ``check_pci_parity`` - Enable/Disable PCI Parity checking control file
-
- This control file enables or disables the PCI Bus Parity scanning
- operation. Writing a 1 to this file enables the scanning. Writing
- a 0 to this file disables the scanning.
-
- Enable::
-
- echo "1" >/sys/devices/system/edac/pci/check_pci_parity
-
- Disable::
-
- echo "0" >/sys/devices/system/edac/pci/check_pci_parity
-
-
-- ``pci_parity_count`` - Parity Count
-
- This attribute file will display the number of parity errors that
- have been detected.
-
-
-Module parameters
------------------
-
-- ``edac_mc_panic_on_ue`` - Panic on UE control file
-
- An uncorrectable error will cause a machine panic. This is usually
- desirable. It is a bad idea to continue when an uncorrectable error
- occurs - it is indeterminate what was uncorrected and the operating
- system context might be so mangled that continuing will lead to further
- corruption. If the kernel has MCE configured, then EDAC will never
- notice the UE.
-
- LOAD TIME::
-
- module/kernel parameter: edac_mc_panic_on_ue=[0|1]
-
- RUN TIME::
-
- echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
-
-
-- ``edac_mc_log_ue`` - Log UE control file
-
-
- Generate kernel messages describing uncorrectable errors. These errors
- are reported through the system message log system. UE statistics
- will be accumulated even when UE logging is disabled.
-
- LOAD TIME::
-
- module/kernel parameter: edac_mc_log_ue=[0|1]
-
- RUN TIME::
-
- echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
-
-
-- ``edac_mc_log_ce`` - Log CE control file
-
-
- Generate kernel messages describing correctable errors. These
- errors are reported through the system message log system.
- CE statistics will be accumulated even when CE logging is disabled.
-
- LOAD TIME::
-
- module/kernel parameter: edac_mc_log_ce=[0|1]
-
- RUN TIME::
-
- echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
-
-
-- ``edac_mc_poll_msec`` - Polling period control file
-
-
- The time period, in milliseconds, for polling for error information.
- Too small a value wastes resources. Too large a value might delay
- necessary handling of errors and might loose valuable information for
- locating the error. 1000 milliseconds (once each second) is the current
- default. Systems which require all the bandwidth they can get, may
- increase this.
-
- LOAD TIME::
-
- module/kernel parameter: edac_mc_poll_msec=[0|1]
-
- RUN TIME::
-
- echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
-
-
-- ``panic_on_pci_parity`` - Panic on PCI PARITY Error
-
-
- This control file enables or disables panicking when a parity
- error has been detected.
-
-
- module/kernel parameter::
-
- edac_panic_on_pci_pe=[0|1]
-
- Enable::
-
- echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
-
- Disable::
-
- echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
-
-
-
-EDAC device type
-----------------
-
-In the header file, edac_pci.h, there is a series of edac_device structures
-and APIs for the EDAC_DEVICE.
-
-User space access to an edac_device is through the sysfs interface.
-
-At the location ``/sys/devices/system/edac`` (sysfs) new edac_device devices
-will appear.
-
-There is a three level tree beneath the above ``edac`` directory. For example,
-the ``test_device_edac`` device (found at the http://bluesmoke.sourceforget.net
-website) installs itself as::
-
- /sys/devices/system/edac/test-instance
-
-in this directory are various controls, a symlink and one or more ``instance``
-directories.
-
-The standard default controls are:
-
- ============== =======================================================
- log_ce boolean to log CE events
- log_ue boolean to log UE events
- panic_on_ue boolean to ``panic`` the system if an UE is encountered
- (default off, can be set true via startup script)
- poll_msec time period between POLL cycles for events
- ============== =======================================================
-
-The test_device_edac device adds at least one of its own custom control:
-
- ============== ==================================================
- test_bits which in the current test driver does nothing but
- show how it is installed. A ported driver can
- add one or more such controls and/or attributes
- for specific uses.
- One out-of-tree driver uses controls here to allow
- for ERROR INJECTION operations to hardware
- injection registers
- ============== ==================================================
-
-The symlink points to the 'struct dev' that is registered for this edac_device.
-
-Instances
----------
-
-One or more instance directories are present. For the ``test_device_edac``
-case:
-
- +----------------+
- | test-instance0 |
- +----------------+
-
-
-In this directory there are two default counter attributes, which are totals of
-counter in deeper subdirectories.
-
- ============== ====================================
- ce_count total of CE events of subdirectories
- ue_count total of UE events of subdirectories
- ============== ====================================
-
-Blocks
-------
-
-At the lowest directory level is the ``block`` directory. There can be 0, 1
-or more blocks specified in each instance:
-
- +-------------+
- | test-block0 |
- +-------------+
-
-In this directory the default attributes are:
-
- ============== ================================================
- ce_count which is counter of CE events for this ``block``
- of hardware being monitored
- ue_count which is counter of UE events for this ``block``
- of hardware being monitored
- ============== ================================================
-
-
-The ``test_device_edac`` device adds 4 attributes and 1 control:
-
- ================== ====================================================
- test-block-bits-0 for every POLL cycle this counter
- is incremented
- test-block-bits-1 every 10 cycles, this counter is bumped once,
- and test-block-bits-0 is set to 0
- test-block-bits-2 every 100 cycles, this counter is bumped once,
- and test-block-bits-1 is set to 0
- test-block-bits-3 every 1000 cycles, this counter is bumped once,
- and test-block-bits-2 is set to 0
- ================== ====================================================
-
-
- ================== ====================================================
- reset-counters writing ANY thing to this control will
- reset all the above counters.
- ================== ====================================================
-
-
-Use of the ``test_device_edac`` driver should enable any others to create their own
-unique drivers for their hardware systems.
-
-The ``test_device_edac`` sample driver is located at the
-http://bluesmoke.sourceforge.net project site for EDAC.
-
-
-Usage of EDAC APIs on Nehalem and newer Intel CPUs
---------------------------------------------------
-
-On older Intel architectures, the memory controller was part of the North
-Bridge chipset. Nehalem, Sandy Bridge, Ivy Bridge, Haswell, Sky Lake and
-newer Intel architectures integrated an enhanced version of the memory
-controller (MC) inside the CPUs.
-
-This chapter will cover the differences of the enhanced memory controllers
-found on newer Intel CPUs, such as ``i7core_edac``, ``sb_edac`` and
-``sbx_edac`` drivers.
-
-.. note::
-
- The Xeon E7 processor families use a separate chip for the memory
- controller, called Intel Scalable Memory Buffer. This section doesn't
- apply for such families.
-
-1) There is one Memory Controller per Quick Patch Interconnect
- (QPI). At the driver, the term "socket" means one QPI. This is
- associated with a physical CPU socket.
-
- Each MC have 3 physical read channels, 3 physical write channels and
- 3 logic channels. The driver currently sees it as just 3 channels.
- Each channel can have up to 3 DIMMs.
-
- The minimum known unity is DIMMs. There are no information about csrows.
- As EDAC API maps the minimum unity is csrows, the driver sequentially
- maps channel/DIMM into different csrows.
-
- For example, supposing the following layout::
-
- Ch0 phy rd0, wr0 (0x063f4031): 2 ranks, UDIMMs
- dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
- dimm 1 1024 Mb offset: 4, bank: 8, rank: 1, row: 0x4000, col: 0x400
- Ch1 phy rd1, wr1 (0x063f4031): 2 ranks, UDIMMs
- dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
- Ch2 phy rd3, wr3 (0x063f4031): 2 ranks, UDIMMs
- dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
-
- The driver will map it as::
-
- csrow0: channel 0, dimm0
- csrow1: channel 0, dimm1
- csrow2: channel 1, dimm0
- csrow3: channel 2, dimm0
-
- exports one DIMM per csrow.
-
- Each QPI is exported as a different memory controller.
-
-2) The MC has the ability to inject errors to test drivers. The drivers
- implement this functionality via some error injection nodes:
-
- For injecting a memory error, there are some sysfs nodes, under
- ``/sys/devices/system/edac/mc/mc?/``:
-
- - ``inject_addrmatch/*``:
- Controls the error injection mask register. It is possible to specify
- several characteristics of the address to match an error code::
-
- dimm = the affected dimm. Numbers are relative to a channel;
- rank = the memory rank;
- channel = the channel that will generate an error;
- bank = the affected bank;
- page = the page address;
- column (or col) = the address column.
-
- each of the above values can be set to "any" to match any valid value.
-
- At driver init, all values are set to any.
-
- For example, to generate an error at rank 1 of dimm 2, for any channel,
- any bank, any page, any column::
-
- echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
- echo 1 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
-
- To return to the default behaviour of matching any, you can do::
-
- echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
- echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
-
- - ``inject_eccmask``:
- specifies what bits will have troubles,
-
- - ``inject_section``:
- specifies what ECC cache section will get the error::
-
- 3 for both
- 2 for the highest
- 1 for the lowest
-
- - ``inject_type``:
- specifies the type of error, being a combination of the following bits::
-
- bit 0 - repeat
- bit 1 - ecc
- bit 2 - parity
-
- - ``inject_enable``:
- starts the error generation when something different than 0 is written.
-
- All inject vars can be read. root permission is needed for write.
-
- Datasheet states that the error will only be generated after a write on an
- address that matches inject_addrmatch. It seems, however, that reading will
- also produce an error.
-
- For example, the following code will generate an error for any write access
- at socket 0, on any DIMM/address on channel 2::
-
- echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/channel
- echo 2 >/sys/devices/system/edac/mc/mc0/inject_type
- echo 64 >/sys/devices/system/edac/mc/mc0/inject_eccmask
- echo 3 >/sys/devices/system/edac/mc/mc0/inject_section
- echo 1 >/sys/devices/system/edac/mc/mc0/inject_enable
- dd if=/dev/mem of=/dev/null seek=16k bs=4k count=1 >& /dev/null
-
- For socket 1, it is needed to replace "mc0" by "mc1" at the above
- commands.
-
- The generated error message will look like::
-
- EDAC MC0: UE row 0, channel-a= 0 channel-b= 0 labels "-": NON_FATAL (addr = 0x0075b980, socket=0, Dimm=0, Channel=2, syndrome=0x00000040, count=1, Err=8c0000400001009f:4000080482 (read error: read ECC error))
-
-3) Corrected Error memory register counters
-
- Those newer MCs have some registers to count memory errors. The driver
- uses those registers to report Corrected Errors on devices with Registered
- DIMMs.
-
- However, those counters don't work with Unregistered DIMM. As the chipset
- offers some counters that also work with UDIMMs (but with a worse level of
- granularity than the default ones), the driver exposes those registers for
- UDIMM memories.
-
- They can be read by looking at the contents of ``all_channel_counts/``::
-
- $ for i in /sys/devices/system/edac/mc/mc0/all_channel_counts/*; do echo $i; cat $i; done
- /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm0
- 0
- /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm1
- 0
- /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm2
- 0
-
- What happens here is that errors on different csrows, but at the same
- dimm number will increment the same counter.
- So, in this memory mapping::
-
- csrow0: channel 0, dimm0
- csrow1: channel 0, dimm1
- csrow2: channel 1, dimm0
- csrow3: channel 2, dimm0
-
- The hardware will increment udimm0 for an error at the first dimm at either
- csrow0, csrow2 or csrow3;
-
- The hardware will increment udimm1 for an error at the second dimm at either
- csrow0, csrow2 or csrow3;
-
- The hardware will increment udimm2 for an error at the third dimm at either
- csrow0, csrow2 or csrow3;
-
-4) Standard error counters
-
- The standard error counters are generated when an mcelog error is received
- by the driver. Since, with UDIMM, this is counted by software, it is
- possible that some errors could be lost. With RDIMM's, they display the
- contents of the registers
-
-Reference documents used on ``amd64_edac``
-------------------------------------------
-
-``amd64_edac`` module is based on the following documents
-(available from http://support.amd.com/en-us/search/tech-docs):
-
-1. :Title: BIOS and Kernel Developer's Guide for AMD Athlon 64 and AMD
- Opteron Processors
- :AMD publication #: 26094
- :Revision: 3.26
- :Link: http://support.amd.com/TechDocs/26094.PDF
-
-2. :Title: BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh
- Processors
- :AMD publication #: 32559
- :Revision: 3.00
- :Issue Date: May 2006
- :Link: http://support.amd.com/TechDocs/32559.pdf
-
-3. :Title: BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h
- Processors
- :AMD publication #: 31116
- :Revision: 3.00
- :Issue Date: September 07, 2007
- :Link: http://support.amd.com/TechDocs/31116.pdf
-
-4. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h
- Models 30h-3Fh Processors
- :AMD publication #: 49125
- :Revision: 3.06
- :Issue Date: 2/12/2015 (latest release)
- :Link: http://support.amd.com/TechDocs/49125_15h_Models_30h-3Fh_BKDG.pdf
-
-5. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 15h
- Models 60h-6Fh Processors
- :AMD publication #: 50742
- :Revision: 3.01
- :Issue Date: 7/23/2015 (latest release)
- :Link: http://support.amd.com/TechDocs/50742_15h_Models_60h-6Fh_BKDG.pdf
-
-6. :Title: BIOS and Kernel Developer's Guide (BKDG) for AMD Family 16h
- Models 00h-0Fh Processors
- :AMD publication #: 48751
- :Revision: 3.03
- :Issue Date: 2/23/2015 (latest release)
- :Link: http://support.amd.com/TechDocs/48751_16h_bkdg.pdf
-
-Credits
-=======
-
-* Written by Doug Thompson <dougthompson@xmission.com>
-
- - 7 Dec 2005
- - 17 Jul 2007 Updated
-
-* |copy| Mauro Carvalho Chehab
-
- - 05 Aug 2009 Nehalem interface
- - 26 Oct 2016 Converted to ReST and cleanups at the Nehalem section
-
-* EDAC authors/maintainers:
-
- - Doug Thompson, Dave Jiang, Dave Peterson et al,
- - Mauro Carvalho Chehab
- - Borislav Petkov
- - original author: Thayne Harbaugh
diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index 2fd5a030235a..a68e6d909274 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -41,7 +41,7 @@ If you are facing multiple issues with the Linux kernel at once, report each
separately. While writing your report, include all information relevant to the
issue, like the kernel and the distro used. In case of a regression, CC the
regressions mailing list (regressions@lists.linux.dev) to your report. Also try
-to pin-point the culprit with a bisection; if you succeed, include its
+to pinpoint the culprit with a bisection; if you succeed, include its
commit-id and CC everyone in the sign-off-by chain.
Once the report is out, answer any questions that come up and help where you
@@ -206,7 +206,7 @@ Reporting issues only occurring in older kernel version lines
This subsection is for you, if you tried the latest mainline kernel as outlined
above, but failed to reproduce your issue there; at the same time you want to
see the issue fixed in a still supported stable or longterm series or vendor
-kernels regularly rebased on those. If that the case, follow these steps:
+kernels regularly rebased on those. If that is the case, follow these steps:
* Prepare yourself for the possibility that going through the next few steps
might not get the issue solved in older releases: the fix might be too big
@@ -312,7 +312,7 @@ small modifications to a kernel based on a recent Linux version; that for
example often holds true for the mainline kernels shipped by Debian GNU/Linux
Sid or Fedora Rawhide. Some developers will also accept reports about issues
with kernels from distributions shipping the latest stable kernel, as long as
-its only slightly modified; that for example is often the case for Arch Linux,
+it's only slightly modified; that for example is often the case for Arch Linux,
regular Fedora releases, and openSUSE Tumbleweed. But keep in mind, you better
want to use a mainline Linux and avoid using a stable kernel for this
process, as outlined in the section 'Install a fresh kernel for testing' in more
@@ -611,7 +611,7 @@ better place.
How to read the MAINTAINERS file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-To illustrate how to use the :ref:`MAINTAINERS <maintainers>` file, lets assume
+To illustrate how to use the :ref:`MAINTAINERS <maintainers>` file, let's assume
the WiFi in your Laptop suddenly misbehaves after updating the kernel. In that
case it's likely an issue in the WiFi driver. Obviously it could also be some
code it builds upon, but unless you suspect something like that stick to the
@@ -1543,7 +1543,7 @@ as well, because that will speed things up.
And note, it helps developers a great deal if you can specify the exact version
that introduced the problem. Hence if possible within a reasonable time frame,
-try to find that version using vanilla kernels. Lets assume something broke when
+try to find that version using vanilla kernels. Let's assume something broke when
your distributor released a update from Linux kernel 5.10.5 to 5.10.8. Then as
instructed above go and check the latest kernel from that version line, say
5.10.9. If it shows the problem, try a vanilla 5.10.5 to ensure that no patches
diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst
index d8adccdae23f..946518355a2c 100644
--- a/Documentation/admin-guide/reporting-regressions.rst
+++ b/Documentation/admin-guide/reporting-regressions.rst
@@ -31,7 +31,7 @@ The important bits (aka "TL;DR")
Linux kernel regression tracking bot "regzbot" track the issue by specifying
when the regression started like this::
- #regzbot introduced v5.13..v5.14-rc1
+ #regzbot introduced: v5.13..v5.14-rc1
All the details on Linux kernel regressions relevant for users
@@ -42,12 +42,12 @@ The important basics
--------------------
-What is a "regression" and what is the "no regressions rule"?
+What is a "regression" and what is the "no regressions" rule?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It's a regression if some application or practical use case running fine with
one Linux kernel works worse or not at all with a newer version compiled using a
-similar configuration. The "no regressions rule" forbids this to take place; if
+similar configuration. The "no regressions" rule forbids this to take place; if
it happens by accident, developers that caused it are expected to quickly fix
the issue.
@@ -173,7 +173,7 @@ Additional details about regressions
------------------------------------
-What is the goal of the "no regressions rule"?
+What is the goal of the "no regressions" rule?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Users should feel safe when updating kernel versions and not have to worry
@@ -199,8 +199,8 @@ Exceptions to this rule are extremely rare; in the past developers almost always
turned out to be wrong when they assumed a particular situation was warranting
an exception.
-Who ensures the "no regressions" is actually followed?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Who ensures the "no regressions" rule is actually followed?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The subsystem maintainers should take care of that, which are watched and
supported by the tree maintainers -- e.g. Linus Torvalds for mainline and
diff --git a/Documentation/admin-guide/serial-console.rst b/Documentation/admin-guide/serial-console.rst
index a3dfc2c66e01..1609e7479249 100644
--- a/Documentation/admin-guide/serial-console.rst
+++ b/Documentation/admin-guide/serial-console.rst
@@ -78,7 +78,9 @@ If no console device is specified, the first device found capable of
acting as a system console will be used. At this time, the system
first looks for a VGA card and then for a serial port. So if you don't
have a VGA card in your system the first serial port will automatically
-become the console.
+become the console, unless the kernel is configured with the
+CONFIG_NULL_TTY_DEFAULT_CONSOLE option, then it will default to using the
+ttynull device.
You will need to create a new device to use ``/dev/console``. The official
``/dev/console`` is now character device 5,1.
diff --git a/Documentation/admin-guide/syscall-user-dispatch.rst b/Documentation/admin-guide/syscall-user-dispatch.rst
index e3cfffef5a63..c1768d9e80fa 100644
--- a/Documentation/admin-guide/syscall-user-dispatch.rst
+++ b/Documentation/admin-guide/syscall-user-dispatch.rst
@@ -53,20 +53,25 @@ following prctl:
prctl(PR_SET_SYSCALL_USER_DISPATCH, <op>, <offset>, <length>, [selector])
-<op> is either PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF, to enable and
-disable the mechanism globally for that thread. When
-PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
-
-[<offset>, <offset>+<length>) delimit a memory region interval
-from which syscalls are always executed directly, regardless of the
-userspace selector. This provides a fast path for the C library, which
-includes the most common syscall dispatchers in the native code
-applications, and also provides a way for the signal handler to return
+<op> is either PR_SYS_DISPATCH_EXCLUSIVE_ON/PR_SYS_DISPATCH_INCLUSIVE_ON
+or PR_SYS_DISPATCH_OFF, to enable and disable the mechanism globally for
+that thread. When PR_SYS_DISPATCH_OFF is used, the other fields must be zero.
+
+For PR_SYS_DISPATCH_EXCLUSIVE_ON [<offset>, <offset>+<length>) delimit
+a memory region interval from which syscalls are always executed directly,
+regardless of the userspace selector. This provides a fast path for the
+C library, which includes the most common syscall dispatchers in the native
+code applications, and also provides a way for the signal handler to return
without triggering a nested SIGSYS on (rt\_)sigreturn. Users of this
interface should make sure that at least the signal trampoline code is
included in this region. In addition, for syscalls that implement the
trampoline code on the vDSO, that trampoline is never intercepted.
+For PR_SYS_DISPATCH_INCLUSIVE_ON [<offset>, <offset>+<length>) delimit
+a memory region interval from which syscalls are dispatched based on
+the userspace selector. Syscalls from outside of the range are always
+executed directly.
+
[selector] is a pointer to a char-sized region in the process memory
region, that provides a quick way to enable disable syscall redirection
thread-wide, without the need to invoke the kernel directly. selector
diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst
index 47499a1742bd..9b7f65c3efd8 100644
--- a/Documentation/admin-guide/sysctl/fs.rst
+++ b/Documentation/admin-guide/sysctl/fs.rst
@@ -38,6 +38,11 @@ requests. ``aio-max-nr`` allows you to change the maximum value
``aio-max-nr`` does not result in the
pre-allocation or re-sizing of any kernel data structures.
+dentry-negative
+----------------------------
+
+Policy for negative dentries. Set to 1 to always delete the dentry when a
+file is removed, and 0 to disable it. By default, this behavior is disabled.
dentry-state
------------
@@ -159,8 +164,8 @@ pipe-user-pages-soft
--------------------
Maximum total number of pages a non-privileged user may allocate for pipes
-before the pipe size gets limited to a single page. Once this limit is reached,
-new pipes will be limited to a single page in size for this user in order to
+before the pipe size gets limited to two pages. Once this limit is reached,
+new pipes will be limited to two pages in size for this user in order to
limit total memory usage, and trying to increase them using ``fcntl()`` will be
denied until usage goes below the limit again. The default value allows to
allocate up to 1024 pipes at their default size. When set to 0, no limit is
@@ -332,3 +337,38 @@ Each "watch" costs roughly 90 bytes on a 32-bit kernel, and roughly 160 bytes
on a 64-bit one.
The current default value for ``max_user_watches`` is 4% of the
available low memory, divided by the "watch" cost in bytes.
+
+5. /proc/sys/fs/fuse - Configuration options for FUSE filesystems
+=====================================================================
+
+This directory contains the following configuration options for FUSE
+filesystems:
+
+``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for
+setting/getting the maximum number of pages that can be used for servicing
+requests in FUSE.
+
+``/proc/sys/fs/fuse/default_request_timeout`` is a read/write file for
+setting/getting the default timeout (in seconds) for a fuse server to
+reply to a kernel-issued request in the event where the server did not
+specify a timeout at mount. If the server set a timeout,
+then default_request_timeout will be ignored. The default
+"default_request_timeout" is set to 0. 0 indicates no default timeout.
+The maximum value that can be set is 65535.
+
+``/proc/sys/fs/fuse/max_request_timeout`` is a read/write file for
+setting/getting the maximum timeout (in seconds) for a fuse server to
+reply to a kernel-issued request. A value greater than 0 automatically opts
+the server into a timeout that will be set to at most "max_request_timeout",
+even if the server did not specify a timeout and default_request_timeout is
+set to 0. If max_request_timeout is greater than 0 and the server set a timeout
+greater than max_request_timeout or default_request_timeout is set to a value
+greater than max_request_timeout, the system will use max_request_timeout as the
+timeout. 0 indicates no max request timeout. The maximum value that can be set
+is 65535.
+
+For timeouts, if the server does not respond to the request by the time
+the set timeout elapses, then the connection to the fuse server will be aborted.
+Please note that the timeouts are not 100% precise (eg you may set 60 seconds but
+the timeout may kick in after 70 seconds). The upper margin of error for the
+timeout is roughly FUSE_TIMEOUT_TIMER_FREQ seconds.
diff --git a/Documentation/admin-guide/sysctl/index.rst b/Documentation/admin-guide/sysctl/index.rst
index 03346f98c7b9..4dd2c9b5d752 100644
--- a/Documentation/admin-guide/sysctl/index.rst
+++ b/Documentation/admin-guide/sysctl/index.rst
@@ -66,25 +66,31 @@ This documentation is about:
=============== ===============================================================
abi/ execution domains & personalities
-debug/ <empty>
-dev/ device specific information (eg dev/cdrom/info)
+<$ARCH> tuning controls for various CPU architecture (e.g. csky, s390)
+crypto/ <undocumented>
+debug/ <undocumented>
+dev/ device specific information (e.g. dev/cdrom/info)
fs/ specific filesystems
filehandle, inode, dentry and quota tuning
binfmt_misc <Documentation/admin-guide/binfmt-misc.rst>
kernel/ global kernel info / tuning
miscellaneous stuff
+ some architecture-specific controls
+ security (LSM) stuff
net/ networking stuff, for documentation look in:
<Documentation/networking/>
proc/ <empty>
sunrpc/ SUN Remote Procedure Call (NFS)
+user/ Per user namespace limits
vm/ memory management tuning
buffer and cache management
-user/ Per user per user namespace limits
+xen/ <undocumented>
=============== ===============================================================
-These are the subdirs I have on my system. There might be more
-or other subdirs in another setup. If you see another dir, I'd
-really like to hear about it :-)
+These are the subdirs I have on my system or have been discovered by
+searching through the source code. There might be more or other subdirs
+in another setup. If you see another dir, I'd really like to hear about
+it :-)
.. toctree::
:maxdepth: 1
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 6584a1f9bfe3..239da22c4e28 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -177,6 +177,7 @@ core_pattern
%E executable path
%c maximum size of core file by resource limit RLIMIT_CORE
%C CPU the task ran on
+ %F pidfd number
%<OTHER> both are dropped
======== ==========================================
@@ -212,6 +213,17 @@ pid>/``).
This value defaults to 0.
+core_sort_vma
+=============
+
+The default coredump writes VMAs in address order. By setting
+``core_sort_vma`` to 1, VMAs will be written from smallest size
+to largest size. This is known to break at least elfutils, but
+can be handy when dealing with very large (and truncated)
+coredumps where the more useful debugging details are included
+in the smaller VMAs.
+
+
core_uses_pid
=============
@@ -296,12 +308,30 @@ kernel panic). This will output the contents of the ftrace buffers to
the console. This is very useful for capturing traces that lead to
crashes and outputting them to a serial console.
-= ===================================================
-0 Disabled (default).
-1 Dump buffers of all CPUs.
-2 Dump the buffer of the CPU that triggered the oops.
-= ===================================================
+======================= ===========================================
+0 Disabled (default).
+1 Dump buffers of all CPUs.
+2(orig_cpu) Dump the buffer of the CPU that triggered the
+ oops.
+<instance> Dump the specific instance buffer on all CPUs.
+<instance>=2(orig_cpu) Dump the specific instance buffer on the CPU
+ that triggered the oops.
+======================= ===========================================
+
+Multiple instance dump is also supported, and instances are separated
+by commas. If global buffer also needs to be dumped, please specify
+the dump mode (1/2/orig_cpu) first for global buffer.
+
+So for example to dump "foo" and "bar" instance buffer on all CPUs,
+user can::
+
+ echo "foo,bar" > /proc/sys/kernel/ftrace_dump_on_oops
+
+To dump global buffer and "foo" instance buffer on all
+CPUs along with the "bar" instance buffer on CPU that triggered the
+oops, user can::
+ echo "1,foo,bar=2" > /proc/sys/kernel/ftrace_dump_on_oops
ftrace_enabled, stack_tracer_enabled
====================================
@@ -367,13 +397,14 @@ a hung task is detected.
hung_task_panic
===============
-Controls the kernel's behavior when a hung task is detected.
+When set to a non-zero value, a kernel panic will be triggered if the
+number of hung tasks found during a single scan reaches this value.
This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
-= =================================================
+= =======================================================
0 Continue operation. This is the default behavior.
-1 Panic immediately.
-= =================================================
+N Panic when N hung tasks are found during a single scan.
+= =======================================================
hung_task_check_count
@@ -383,6 +414,20 @@ The upper bound on the number of tasks that are checked.
This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+hung_task_detect_count
+======================
+
+Indicates the total number of tasks that have been detected as hung since
+the system boot.
+
+This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
+
+hung_task_sys_info
+==================
+A comma separated list of extra system information to be dumped when
+hung task is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
+
hung_task_timeout_secs
======================
@@ -436,7 +481,7 @@ ignore-unaligned-usertrap
On architectures where unaligned accesses cause traps, and where this
feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``;
-currently, ``arc`` and ``loongarch``), controls whether all
+currently, ``arc``, ``parisc`` and ``loongarch``), controls whether all
unaligned traps are logged.
= =============================================================
@@ -476,6 +521,15 @@ default), only processes with the CAP_SYS_ADMIN capability may create
io_uring instances.
+kernel_sys_info
+===============
+A comma separated list of extra system information to be dumped when
+soft/hard lockup is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
+
+It serves as the default kernel control knob, which will take effect
+when a kernel module calls sys_info() with parameter==0.
+
kexec_load_disabled
===================
@@ -537,6 +591,11 @@ if leaking kernel pointer values to unprivileged users is a concern.
When ``kptr_restrict`` is set to 2, kernel pointers printed using
%pK will be replaced with 0s regardless of privileges.
+softlockup_sys_info & hardlockup_sys_info
+=========================================
+A comma separated list of extra system information to be dumped when
+soft/hard lockup is detected, for example, "tasks,mem,timers,locks,...".
+Refer 'panic_sys_info' section below for more details.
modprobe
========
@@ -594,6 +653,9 @@ default (``MSGMNB``).
``msgmni`` is the maximum number of IPC queues. 32000 by default
(``MSGMNI``).
+All of these parameters are set per ipc namespace. The maximum number of bytes
+in POSIX message queues is limited by ``RLIMIT_MSGQUEUE``. This limit is
+respected hierarchically in the each user namespace.
msg_next_id, sem_next_id, and shm_next_id (System V IPC)
========================================================
@@ -848,8 +910,9 @@ bit 1 print system memory info
bit 2 print timer info
bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
bit 4 print ftrace buffer
-bit 5 print all printk messages in buffer
+bit 5 replay all kernel messages on consoles at the end of panic
bit 6 print all CPUs backtrace (if available in the arch)
+bit 7 print only tasks in uninterruptible (blocked) state
===== ============================================
So for example to print tasks and memory info on panic, user can::
@@ -857,6 +920,24 @@ So for example to print tasks and memory info on panic, user can::
echo 3 > /proc/sys/kernel/panic_print
+panic_sys_info
+==============
+
+A comma separated list of extra information to be dumped on panic,
+for example, "tasks,mem,timers,...". It is a human readable alternative
+to 'panic_print'. Possible values are:
+
+============= ===================================================
+tasks print all tasks info
+mem print system memory info
+timers print timers info
+locks print locks info if CONFIG_LOCKDEP is on
+ftrace print ftrace buffer
+all_bt print all CPUs backtrace (if available in the arch)
+blocked_tasks print only tasks in uninterruptible (blocked) state
+============= ===================================================
+
+
panic_on_rcu_stall
==================
@@ -972,30 +1053,26 @@ perf_user_access (arm64 and riscv only)
Controls user space access for reading perf event counters.
-arm64
-=====
-
-The default value is 0 (access disabled).
+* for arm64
+ The default value is 0 (access disabled).
-When set to 1, user space can read performance monitor counter registers
-directly.
+ When set to 1, user space can read performance monitor counter registers
+ directly.
-See Documentation/arch/arm64/perf.rst for more information.
-
-riscv
-=====
+ See Documentation/arch/arm64/perf.rst for more information.
-When set to 0, user space access is disabled.
+* for riscv
+ When set to 0, user space access is disabled.
-The default value is 1, user space can read performance monitor counter
-registers through perf, any direct access without perf intervention will trigger
-an illegal instruction.
+ The default value is 1, user space can read performance monitor counter
+ registers through perf, any direct access without perf intervention will trigger
+ an illegal instruction.
-When set to 2, which enables legacy mode (user space has direct access to cycle
-and insret CSRs only). Note that this legacy value is deprecated and will be
-removed once all user space applications are fixed.
+ When set to 2, which enables legacy mode (user space has direct access to cycle
+ and insret CSRs only). Note that this legacy value is deprecated and will be
+ removed once all user space applications are fixed.
-Note that the time CSR is always directly accessible to all modes.
+ Note that the time CSR is always directly accessible to all modes.
pid_max
=======
@@ -1068,7 +1145,8 @@ printk_ratelimit_burst
While long term we enforce one message per `printk_ratelimit`_
seconds, we do allow a burst of messages to pass through.
``printk_ratelimit_burst`` specifies the number of messages we can
-send before ratelimiting kicks in.
+send before ratelimiting kicks in. After `printk_ratelimit`_ seconds
+have elapsed, another burst of messages may be sent.
The default value is 10 messages.
@@ -1274,15 +1352,20 @@ are doing anyway :)
shmall
======
-This parameter sets the total amount of shared memory pages that
-can be used system wide. Hence, ``shmall`` should always be at least
-``ceil(shmmax/PAGE_SIZE)``.
+This parameter sets the total amount of shared memory pages that can be used
+inside ipc namespace. The shared memory pages counting occurs for each ipc
+namespace separately and is not inherited. Hence, ``shmall`` should always be at
+least ``ceil(shmmax/PAGE_SIZE)``.
If you are not sure what the default ``PAGE_SIZE`` is on your Linux
system, you can run the following command::
# getconf PAGE_SIZE
+To reduce or disable the ability to allocate shared memory, you must create a
+new ipc namespace, set this parameter to the required value and prohibit the
+creation of a new ipc namespace in the current user namespace or cgroups can
+be used.
shmmax
======
@@ -1418,7 +1501,7 @@ stack_erasing
=============
This parameter can be used to control kernel stack erasing at the end
-of syscalls for kernels built with ``CONFIG_GCC_PLUGIN_STACKLEAK``.
+of syscalls for kernels built with ``CONFIG_KSTACK_ERASE``.
That erasing reduces the information which kernel stack leak bugs
can reveal and blocks some uninitialized stack variable attacks.
@@ -1426,7 +1509,7 @@ The tradeoff is the performance impact: on a single CPU system kernel
compilation sees a 1% slowdown, other systems and workloads may vary.
= ====================================================================
-0 Kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+0 Kernel stack erasing is disabled, KSTACK_ERASE_METRICS are not updated.
1 Kernel stack erasing is enabled (default), it is performed before
returning to the userspace at the end of syscalls.
= ====================================================================
@@ -1508,6 +1591,13 @@ constant ``FUTEX_TID_MASK`` (0x3fffffff).
If a value outside of this range is written to ``threads-max`` an
``EINVAL`` error occurs.
+timer_migration
+===============
+
+When set to a non-zero value, attempt to migrate timers away from idle cpus to
+allow them to remain in low power states longer.
+
+Default is set (1).
traceoff_on_warning
===================
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 396091651955..369a738a6819 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -72,6 +72,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on:
- riscv64
- riscv32
- loongarch64
+ - arc
And the older cBPF JIT supported on the following archs:
@@ -206,6 +207,19 @@ Will increase power usage.
Default: 0 (off)
+mem_pcpu_rsv
+------------
+
+Per-cpu reserved forward alloc cache size in page units. Default 1MB per CPU.
+
+bypass_prot_mem
+---------------
+
+Skip charging socket buffers to the global per-protocol memory
+accounting controlled by net.ipv4.tcp_mem, net.ipv4.udp_mem, etc.
+
+Default: 0 (off)
+
rmem_default
------------
@@ -216,6 +230,8 @@ rmem_max
The maximum receive socket buffer size in bytes.
+Default: 4194304
+
rps_default_mask
----------------
@@ -241,6 +257,8 @@ wmem_max
The maximum send socket buffer size in bytes.
+Default: 4194304
+
message_burst and message_cost
------------------------------
@@ -337,9 +355,9 @@ skb_defer_max
-------------
Max size (in skbs) of the per-cpu list of skbs being freed
-by the cpu which allocated them. Used by TCP stack so far.
+by the cpu which allocated them.
-Default: 64
+Default: 128
optmem_max
----------
@@ -396,6 +414,23 @@ to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
If set to 1 (default), hash rethink is performed on listening socket.
If set to 0, hash rethink is not performed.
+txq_reselection_ms
+------------------
+
+Controls how often (in ms) a busy connected flow can select another tx queue.
+
+A resection is desirable when/if user thread has migrated and XPS
+would select a different queue. Same can occur without XPS
+if the flow hash has changed.
+
+But switching txq can introduce reorders, especially if the
+old queue is under high pressure. Modern TCP stacks deal
+well with reorders if they happen not too often.
+
+To disable this feature, set the value to 0.
+
+Default : 1000
+
gro_normal_batch
----------------
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index c59889de122b..4d71211fdad8 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm:
- compact_memory
- compaction_proactiveness
- compact_unevictable_allowed
+- defrag_mode
- dirty_background_bytes
- dirty_background_ratio
- dirty_bytes
@@ -36,6 +37,7 @@ Currently, these files are in /proc/sys/vm:
- dirtytime_expire_seconds
- dirty_writeback_centisecs
- drop_caches
+- enable_soft_offline
- extfrag_threshold
- highmem_is_dirtyable
- hugetlb_shm_group
@@ -43,6 +45,7 @@ Currently, these files are in /proc/sys/vm:
- legacy_va_layout
- lowmem_reserve_ratio
- max_map_count
+- mem_profiling (only if CONFIG_MEM_ALLOC_PROFILING=y)
- memory_failure_early_kill
- memory_failure_recovery
- min_free_kbytes
@@ -72,6 +75,7 @@ Currently, these files are in /proc/sys/vm:
- unprivileged_userfaultfd
- user_reserve_kbytes
- vfs_cache_pressure
+- vfs_cache_pressure_denom
- watermark_boost_factor
- watermark_scale_factor
- zone_reclaim_mode
@@ -128,6 +132,12 @@ to latency spikes in unsuspecting applications. The kernel employs
various heuristics to avoid wasting CPU cycles if it detects that
proactive compaction is not being effective.
+Setting the value above 80 will, in addition to lowering the acceptable level
+of fragmentation, make the compaction code more sensitive to increases in
+fragmentation, i.e. compaction will trigger more often, but reduce
+fragmentation by a smaller amount.
+This makes the fragmentation level more stable over time.
+
Be careful when setting it to extreme values like 100, as that may
cause excessive background compaction activity.
@@ -143,6 +153,14 @@ On CONFIG_PREEMPT_RT the default value is 0 in order to avoid a page fault, due
to compaction, which would block the task from becoming active until the fault
is resolved.
+defrag_mode
+===========
+
+When set to 1, the page allocator tries harder to avoid fragmentation
+and maintain the ability to produce huge pages / higher-order pages.
+
+It is recommended to enable this right after boot, as fragmentation,
+once it occurred, can be long-lasting or even permanent.
dirty_background_bytes
======================
@@ -266,6 +284,43 @@ used::
These are informational only. They do not mean that anything is wrong
with your system. To disable them, echo 4 (bit 2) into drop_caches.
+enable_soft_offline
+===================
+Correctable memory errors are very common on servers. Soft-offline is kernel's
+solution for memory pages having (excessive) corrected memory errors.
+
+For different types of page, soft-offline has different behaviors / costs.
+
+- For a raw error page, soft-offline migrates the in-use page's content to
+ a new raw page.
+
+- For a page that is part of a transparent hugepage, soft-offline splits the
+ transparent hugepage into raw pages, then migrates only the raw error page.
+ As a result, user is transparently backed by 1 less hugepage, impacting
+ memory access performance.
+
+- For a page that is part of a HugeTLB hugepage, soft-offline first migrates
+ the entire HugeTLB hugepage, during which a free hugepage will be consumed
+ as migration target. Then the original hugepage is dissolved into raw
+ pages without compensation, reducing the capacity of the HugeTLB pool by 1.
+
+It is user's call to choose between reliability (staying away from fragile
+physical memory) vs performance / capacity implications in transparent and
+HugeTLB cases.
+
+For all architectures, enable_soft_offline controls whether to soft offline
+memory pages. When set to 1, kernel attempts to soft offline the pages
+whenever it thinks needed. When set to 0, kernel returns EOPNOTSUPP to
+the request to soft offline the pages. Its default value is 1.
+
+It is worth mentioning that after setting enable_soft_offline to 0, the
+following requests to soft offline pages will not be performed:
+
+- Request to soft offline pages from RAS Correctable Errors Collector.
+
+- On ARM, the request to soft offline pages from GHES driver.
+
+- On PARISC, the request to soft offline pages from Page Deallocation Table.
extfrag_threshold
=================
@@ -410,8 +465,8 @@ The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
disables protection of the pages.
-max_map_count:
-==============
+max_map_count
+=============
This file contains the maximum number of memory map areas a process
may have. Memory map areas are used as a side-effect of calling
@@ -425,8 +480,23 @@ e.g., up to one or two maps per allocation.
The default value is 65530.
-memory_failure_early_kill:
-==========================
+mem_profiling
+==============
+
+Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y)
+
+1: Enable memory profiling.
+
+0: Disable memory profiling.
+
+Enabling memory profiling introduces a small performance overhead for all
+memory allocations.
+
+The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
+
+
+memory_failure_early_kill
+=========================
Control how to kill processes when uncorrected memory error (typically
a 2bit error in a memory module) is detected in the background by hardware
@@ -954,19 +1024,28 @@ vfs_cache_pressure
This percentage value controls the tendency of the kernel to reclaim
the memory which is used for caching of directory and inode objects.
-At the default value of vfs_cache_pressure=100 the kernel will attempt to
-reclaim dentries and inodes at a "fair" rate with respect to pagecache and
-swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
-to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
-never reclaim dentries and inodes due to memory pressure and this can easily
-lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
-causes the kernel to prefer to reclaim dentries and inodes.
+At the default value of vfs_cache_pressure=vfs_cache_pressure_denom the kernel
+will attempt to reclaim dentries and inodes at a "fair" rate with respect to
+pagecache and swapcache reclaim. Decreasing vfs_cache_pressure causes the
+kernel to prefer to retain dentry and inode caches. When vfs_cache_pressure=0,
+the kernel will never reclaim dentries and inodes due to memory pressure and
+this can easily lead to out-of-memory conditions. Increasing vfs_cache_pressure
+beyond vfs_cache_pressure_denom causes the kernel to prefer to reclaim dentries
+and inodes.
-Increasing vfs_cache_pressure significantly beyond 100 may have negative
-performance impact. Reclaim code needs to take various locks to find freeable
-directory and inode objects. With vfs_cache_pressure=1000, it will look for
-ten times more freeable objects than there are.
+Increasing vfs_cache_pressure significantly beyond vfs_cache_pressure_denom may
+have negative performance impact. Reclaim code needs to take various locks to
+find freeable directory and inode objects. When vfs_cache_pressure equals
+(10 * vfs_cache_pressure_denom), it will look for ten times more freeable
+objects than there are.
+
+Note: This setting should always be used together with vfs_cache_pressure_denom.
+
+vfs_cache_pressure_denom
+========================
+Defaults to 100 (minimum allowed value). Requires corresponding
+vfs_cache_pressure setting to take effect.
watermark_boost_factor
======================
diff --git a/Documentation/admin-guide/sysrq.rst b/Documentation/admin-guide/sysrq.rst
index 2f2e5bd440f9..9c7aa817adc7 100644
--- a/Documentation/admin-guide/sysrq.rst
+++ b/Documentation/admin-guide/sysrq.rst
@@ -49,26 +49,26 @@ How do I use the magic SysRq key?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
On x86
- You press the key combo :kbd:`ALT-SysRq-<command key>`.
+ You press the key combo `ALT-SysRq-<command key>`.
.. note::
Some
keyboards may not have a key labeled 'SysRq'. The 'SysRq' key is
also known as the 'Print Screen' key. Also some keyboards cannot
handle so many keys being pressed at the same time, so you might
- have better luck with press :kbd:`Alt`, press :kbd:`SysRq`,
- release :kbd:`SysRq`, press :kbd:`<command key>`, release everything.
+ have better luck with press `Alt`, press `SysRq`,
+ release `SysRq`, press `<command key>`, release everything.
On SPARC
- You press :kbd:`ALT-STOP-<command key>`, I believe.
+ You press `ALT-STOP-<command key>`, I believe.
On the serial console (PC style standard serial ports only)
You send a ``BREAK``, then within 5 seconds a command key. Sending
``BREAK`` twice is interpreted as a normal BREAK.
On PowerPC
- Press :kbd:`ALT - Print Screen` (or :kbd:`F13`) - :kbd:`<command key>`.
- :kbd:`Print Screen` (or :kbd:`F13`) - :kbd:`<command key>` may suffice.
+ Press `ALT - Print Screen` (or `F13`) - `<command key>`.
+ `Print Screen` (or `F13`) - `<command key>` may suffice.
On other
If you know of the key combos for other architectures, please
@@ -88,7 +88,7 @@ On all
echo _reisub > /proc/sysrq-trigger
-The :kbd:`<command key>` is case sensitive.
+The `<command key>` is case sensitive.
What are the 'command' keys?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -161,6 +161,8 @@ Command Function
will be printed to your console. (``0``, for example would make
it so that only emergency messages like PANICs or OOPSes would
make it to your console.)
+
+``R`` Replay the kernel log messages on consoles.
=========== ===================================================================
Okay, so what can I use them for?
@@ -211,14 +213,21 @@ processes.
"just thaw ``it(j)``" is useful if your system becomes unresponsive due to a
frozen (probably root) filesystem via the FIFREEZE ioctl.
+``Replay logs(R)`` is useful to view the kernel log messages when system is hung
+or you are not able to use dmesg command to view the messages in printk buffer.
+User may have to press the key combination multiple times if console system is
+busy. If it is completely locked up, then messages won't be printed. Output
+messages depend on current console loglevel, which can be modified using
+sysrq[0-9] (see above).
+
Sometimes SysRq seems to get 'stuck' after using it, what can I do?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When this happens, try tapping shift, alt and control on both sides of the
keyboard, and hitting an invalid sysrq sequence again. (i.e., something like
-:kbd:`alt-sysrq-z`).
+`alt-sysrq-z`).
-Switching to another virtual console (:kbd:`ALT+Fn`) and then back again
+Switching to another virtual console (`ALT+Fn`) and then back again
should also help.
I hit SysRq, but nothing seems to happen, what's wrong?
@@ -281,7 +290,7 @@ exception the header line from the sysrq command is passed to all console
consumers as if the current loglevel was maximum. If only the header
is emitted it is almost certain that the kernel loglevel is too low.
Should you require the output on the console channel then you will need
-to temporarily up the console loglevel using :kbd:`alt-sysrq-8` or::
+to temporarily up the console loglevel using `alt-sysrq-8` or::
echo 8 > /proc/sysrq-trigger
diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst
index 92a8a07f5c43..ed1f8f1e86c5 100644
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@@ -34,7 +34,7 @@ name of the command ('Comm:') that triggered the event::
You'll find a 'Not tainted: ' there if the kernel was not tainted at the
time of the event; if it was, then it will print 'Tainted: ' and characters
-either letters or blanks. In above example it looks like this::
+either letters or blanks. In the example above it looks like this::
Tainted: P W O
@@ -52,7 +52,7 @@ At runtime, you can query the tainted state by reading
tainted; any other number indicates the reasons why it is. The easiest way to
decode that number is the script ``tools/debugging/kernel-chktaint``, which your
distribution might ship as part of a package called ``linux-tools`` or
-``kernel-tools``; if it doesn't you can download the script from
+``kernel-tools``; if it doesn't, you can download the script from
`git.kernel.org <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/tools/debugging/kernel-chktaint>`_
and execute it with ``sh kernel-chktaint``, which would print something like
this on the machine that had the statements in the logs that were quoted earlier::
@@ -101,6 +101,7 @@ Bit Log Number Reason that got the kernel tainted
16 _/X 65536 auxiliary taint, defined for and used by distros
17 _/T 131072 kernel was built with the struct randomization plugin
18 _/N 262144 an in-kernel test has been run
+ 19 _/J 524288 userspace used a mutating debug operation in fwctl
=== === ====== ========================================================
Note: The character ``_`` is representing a blank in this table to make reading
@@ -182,3 +183,9 @@ More detailed explanation for tainting
produce extremely unusual kernel structure layouts (even performance
pathological ones), which is important to know when debugging. Set at
build time.
+
+ 18) ``N`` if an in-kernel test, such as a KUnit test, has been run.
+
+ 19) ``J`` if userspace opened /dev/fwctl/* and performed a FWTCL_RPC_DEBUG_WRITE
+ to use the devices debugging features. Device debugging features could
+ cause the device to malfunction in undefined ways.
diff --git a/Documentation/admin-guide/thermal/index.rst b/Documentation/admin-guide/thermal/index.rst
index 193b7b01a87d..e48bc0a1951b 100644
--- a/Documentation/admin-guide/thermal/index.rst
+++ b/Documentation/admin-guide/thermal/index.rst
@@ -6,3 +6,4 @@ Thermal Subsystem
:maxdepth: 1
intel_powerclamp
+ intel_thermal_throttle
diff --git a/Documentation/admin-guide/thermal/intel_thermal_throttle.rst b/Documentation/admin-guide/thermal/intel_thermal_throttle.rst
new file mode 100644
index 000000000000..f4fbf9d5a4ec
--- /dev/null
+++ b/Documentation/admin-guide/thermal/intel_thermal_throttle.rst
@@ -0,0 +1,91 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=======================================
+Intel thermal throttle events reporting
+=======================================
+
+:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Introduction
+------------
+
+Intel processors have built in automatic and adaptive thermal monitoring
+mechanisms that force the processor to reduce its power consumption in order
+to operate within predetermined temperature limits.
+
+Refer to section "THERMAL MONITORING AND PROTECTION" in the "Intel® 64 and
+IA-32 Architectures Software Developer’s Manual Volume 3 (3A, 3B, 3C, & 3D):
+System Programming Guide" for more details.
+
+In general, there are two mechanisms to control the core temperature of the
+processor. They are called "Thermal Monitor 1 (TM1) and Thermal Monitor 2 (TM2)".
+
+The status of the temperature sensor that triggers the thermal monitor (TM1/TM2)
+is indicated through the "thermal status flag" and "thermal status log flag" in
+MSR_IA32_THERM_STATUS for core level and MSR_IA32_PACKAGE_THERM_STATUS for
+package level.
+
+Thermal Status flag, bit 0 — When set, indicates that the processor core
+temperature is currently at the trip temperature of the thermal monitor and that
+the processor power consumption is being reduced via either TM1 or TM2, depending
+on which is enabled. When clear, the flag indicates that the core temperature is
+below the thermal monitor trip temperature. This flag is read only.
+
+Thermal Status Log flag, bit 1 — When set, indicates that the thermal sensor has
+tripped since the last power-up or reset or since the last time that software
+cleared this flag. This flag is a sticky bit; once set it remains set until
+cleared by software or until a power-up or reset of the processor. The default
+state is clear.
+
+It is possible that when user reads MSR_IA32_THERM_STATUS or
+MSR_IA32_PACKAGE_THERM_STATUS, TM1/TM2 is not active. In this case,
+"Thermal Status flag" will read "0" and the "Thermal Status Log flag" will be set
+to show any previous "TM1/TM2" activation. But since it needs to be cleared by
+the software, it can't show the number of occurrences of "TM1/TM2" activations.
+
+Hence, Linux provides counters of how many times the "Thermal Status flag" was
+set. Also presents how long the "Thermal Status flag" was active in milliseconds.
+Using these counters, users can check if the performance was limited because of
+thermal events. It is recommended to read from sysfs instead of directly reading
+MSRs as the "Thermal Status Log flag" is reset by the driver to implement rate
+control.
+
+Sysfs Interface
+---------------
+
+Thermal throttling events are presented for each CPU under
+"/sys/devices/system/cpu/cpuX/thermal_throttle/", where "X" is the CPU number.
+
+All these counters are read-only. They can't be reset to 0. So, they can potentially
+overflow after reaching the maximum 64 bit unsigned integer.
+
+``core_throttle_count``
+ Shows the number of times "Thermal Status flag" changed from 0 to 1 for this
+ CPU since OS boot and thermal vector is initialized. This is a 64 bit counter.
+
+``package_throttle_count``
+ Shows the number of times "Thermal Status flag" changed from 0 to 1 for the
+ package containing this CPU since OS boot and thermal vector is initialized.
+ Package status is broadcast to all CPUs; all CPUs in the package increment
+ this count. This is a 64-bit counter.
+
+``core_throttle_max_time_ms``
+ Shows the maximum amount of time for which "Thermal Status flag" has been
+ set to 1 for this CPU at the core level since OS boot and thermal vector
+ is initialized.
+
+``package_throttle_max_time_ms``
+ Shows the maximum amount of time for which "Thermal Status flag" has been
+ set to 1 for the package containing this CPU since OS boot and thermal
+ vector is initialized.
+
+``core_throttle_total_time_ms``
+ Shows the cumulative time for which "Thermal Status flag" has been
+ set to 1 for this CPU for core level since OS boot and thermal vector
+ is initialized.
+
+``package_throttle_total_time_ms``
+ Shows the cumulative time for which "Thermal Status flag" has been set
+ to 1 for the package containing this CPU since OS boot and thermal vector
+ is initialized.
diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst
index 2ed79f41a411..07303c1346fb 100644
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@@ -28,7 +28,7 @@ should be a userspace tool that handles all the low-level details, keeps
a database of the authorized devices and prompts users for new connections.
More details about the sysfs interface for Thunderbolt devices can be
-found in ``Documentation/ABI/testing/sysfs-bus-thunderbolt``.
+found in Documentation/ABI/testing/sysfs-bus-thunderbolt.
Those users who just want to connect any device without any sort of
manual work can add following line to
@@ -203,10 +203,10 @@ host controller or a device, it is important that the firmware can be
upgraded to the latest where possible bugs in it have been fixed.
Typically OEMs provide this firmware from their support site.
-There is also a central site which has links where to download firmware
-for some machines:
-
- `Thunderbolt Updates <https://thunderbolttechnology.net/updates>`_
+Currently, recommended method of updating firmware is through "fwupd" tool.
+It uses LVFS (Linux Vendor Firmware Service) portal by default to get the
+latest firmware from hardware vendors and updates connected devices if found
+compatible. For details refer to: https://github.com/fwupd/fwupd.
Before you upgrade firmware on a device, host or retimer, please make
sure it is a suitable upgrade. Failing to do that may render the device
@@ -215,18 +215,40 @@ tools!
Host NVM upgrade on Apple Macs is not supported.
-Once the NVM image has been downloaded, you need to plug in a
-Thunderbolt device so that the host controller appears. It does not
-matter which device is connected (unless you are upgrading NVM on a
-device - then you need to connect that particular device).
+Fwupd is installed by default. If you don't have it on your system, simply
+use your distro package manager to get it.
+
+To see possible updates through fwupd, you need to plug in a Thunderbolt
+device so that the host controller appears. It does not matter which
+device is connected (unless you are upgrading NVM on a device - then you
+need to connect that particular device).
Note an OEM-specific method to power the controller up ("force power") may
be available for your system in which case there is no need to plug in a
Thunderbolt device.
-After that we can write the firmware to the non-active parts of the NVM
-of the host or device. As an example here is how Intel NUC6i7KYK (Skull
-Canyon) Thunderbolt controller NVM is upgraded::
+Updating firmware using fwupd is straightforward - refer to official
+readme on fwupd github.
+
+If firmware image is written successfully, the device shortly disappears.
+Once it comes back, the driver notices it and initiates a full power
+cycle. After a while device appears again and this time it should be
+fully functional.
+
+Device of interest should display new version under "Current version"
+and "Update State: Success" in fwupd's interface.
+
+Upgrading firmware manually
+---------------------------------------------------------------
+If possible, use fwupd to updated the firmware. However, if your device OEM
+has not uploaded the firmware to LVFS, but it is available for download
+from their side, you can use method below to directly upgrade the
+firmware.
+
+Manual firmware update can be done with 'dd' tool. To update firmware
+using this method, you need to write it to the non-active parts of NVM
+of the host or device. Example on how to update Intel NUC6i7KYK
+(Skull Canyon) Thunderbolt controller NVM::
# dd if=KYK_TBT_FW_0018.bin of=/sys/bus/thunderbolt/devices/0-0/nvm_non_active0/nvmem
@@ -235,10 +257,8 @@ upgrade process as follows::
# echo 1 > /sys/bus/thunderbolt/devices/0-0/nvm_authenticate
-If no errors are returned, the host controller shortly disappears. Once
-it comes back the driver notices it and initiates a full power cycle.
-After a while the host controller appears again and this time it should
-be fully functional.
+If no errors are returned, device should behave as described in previous
+section.
We can verify that the new NVM firmware is active by running the following
commands::
@@ -296,6 +316,39 @@ information is missing.
To recover from this mode, one needs to flash a valid NVM image to the
host controller in the same way it is done in the previous chapter.
+Tunneling events
+----------------
+The driver sends ``KOBJ_CHANGE`` events to userspace when there is a
+tunneling change in the ``thunderbolt_domain``. The notification carries
+following environment variables::
+
+ TUNNEL_EVENT=<EVENT>
+ TUNNEL_DETAILS=0:12 <-> 1:20 (USB3)
+
+Possible values for ``<EVENT>`` are:
+
+ activated
+ The tunnel was activated (created).
+
+ changed
+ There is a change in this tunnel. For example bandwidth allocation was
+ changed.
+
+ deactivated
+ The tunnel was torn down.
+
+ low bandwidth
+ The tunnel is not getting optimal bandwidth.
+
+ insufficient bandwidth
+ There is not enough bandwidth for the current tunnel requirements.
+
+The ``TUNNEL_DETAILS`` is only provided if the tunnel is known. For
+example, in case of Firmware Connection Manager this is missing or does
+not provide full tunnel information. In case of Software Connection Manager
+this includes full tunnel details. The format currently matches what the
+driver uses when logging. This may change over time.
+
Networking over Thunderbolt cable
---------------------------------
Thunderbolt technology allows software communication between two hosts
@@ -325,12 +378,7 @@ Forcing power
Many OEMs include a method that can be used to force the power of a
Thunderbolt controller to an "On" state even if nothing is connected.
If supported by your machine this will be exposed by the WMI bus with
-a sysfs attribute called "force_power".
-
-For example the intel-wmi-thunderbolt driver exposes this attribute in:
- /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
-
- To force the power to on, write 1 to this attribute file.
- To disable force power, write 0 to this attribute file.
+a sysfs attribute called "force_power", see
+Documentation/ABI/testing/sysfs-platform-intel-wmi-thunderbolt for details.
Note: it's currently not possible to query the force power state of a platform.
diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
new file mode 100644
index 000000000000..d83601f2a459
--- /dev/null
+++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
@@ -0,0 +1,2222 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
+.. [see the bottom of this file for redistribution information]
+
+=========================================
+How to verify bugs and bisect regressions
+=========================================
+
+This document describes how to check if some Linux kernel problem occurs in code
+currently supported by developers -- to then explain how to locate the change
+causing the issue, if it is a regression (e.g. did not happen with earlier
+versions).
+
+The text aims at people running kernels from mainstream Linux distributions on
+commodity hardware who want to report a kernel bug to the upstream Linux
+developers. Despite this intent, the instructions work just as well for users
+who are already familiar with building their own kernels: they help avoid
+mistakes occasionally made even by experienced developers.
+
+..
+ Note: if you see this note, you are reading the text's source file. You
+ might want to switch to a rendered version: it makes it a lot easier to
+ read and navigate this document -- especially when you want to look something
+ up in the reference section, then jump back to where you left off.
+..
+ Find the latest rendered version of this text here:
+ https://docs.kernel.org/admin-guide/verify-bugs-and-bisect-regressions.html
+
+The essence of the process (aka 'TL;DR')
+========================================
+
+*[If you are new to building or bisecting Linux, ignore this section and head
+over to the* ':ref:`step-by-step guide <introguide_bissbs>`' *below. It utilizes
+the same commands as this section while describing them in brief fashion. The
+steps are nevertheless easy to follow and together with accompanying entries
+in a reference section mention many alternatives, pitfalls, and additional
+aspects, all of which might be essential in your present case.]*
+
+**In case you want to check if a bug is present in code currently supported by
+developers**, execute just the *preparations* and *segment 1*; while doing so,
+consider the newest Linux kernel you regularly use to be the 'working' kernel.
+In the following example that's assumed to be 6.0, which is why its sources
+will be used to prepare the .config file.
+
+**In case you face a regression**, follow the steps at least till the end of
+*segment 2*. Then you can submit a preliminary report -- or continue with
+*segment 3*, which describes how to perform a bisection needed for a
+full-fledged regression report. In the following example 6.0.13 is assumed to be
+the 'working' kernel and 6.1.5 to be the first 'broken', which is why 6.0
+will be considered the 'good' release and used to prepare the .config file.
+
+* **Preparations**: set up everything to build your own kernels::
+
+ # * Remove any software that depends on externally maintained kernel modules
+ # or builds any automatically during bootup.
+ # * Ensure Secure Boot permits booting self-compiled Linux kernels.
+ # * If you are not already running the 'working' kernel, reboot into it.
+ # * Install compilers and everything else needed for building Linux.
+ # * Ensure to have 15 Gigabyte free space in your home directory.
+ git clone -o mainline --no-checkout \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+ git switch --detach v6.0
+ # * Hint: if you used an existing clone, ensure no stale .config is around.
+ make olddefconfig
+ # * Ensure the former command picked the .config of the 'working' kernel.
+ # * Connect external hardware (USB keys, tokens, ...), start a VM, bring up
+ # VPNs, mount network shares, and briefly try the feature that is broken.
+ yes '' | make localmodconfig
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local'
+ ./scripts/config -e CONFIG_LOCALVERSION_AUTO
+ # * Note, when short on storage space, check the guide for an alternative:
+ ./scripts/config -d DEBUG_INFO_NONE -e KALLSYMS_ALL -e DEBUG_KERNEL \
+ -e DEBUG_INFO -e DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -e KALLSYMS
+ # * Hint: at this point you might want to adjust the build configuration;
+ # you'll have to, if you are running Debian.
+ make olddefconfig
+ cp .config ~/kernel-config-working
+
+* **Segment 1**: build a kernel from the latest mainline codebase.
+
+ This among others checks if the problem was fixed already and which developers
+ later need to be told about the problem; in case of a regression, this rules
+ out a .config change as root of the problem.
+
+ a) Checking out latest mainline code::
+
+ cd ~/linux/
+ git switch --discard-changes --detach mainline/master
+
+ b) Build, install, and boot a kernel::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Make sure there is enough disk space to hold another kernel:
+ df -h /boot/ /lib/modules/
+ # * Note: on Arch Linux, its derivatives and a few other distributions
+ # the following commands will do nothing at all or only part of the
+ # job. See the step-by-step guide for further details.
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ # * Check how much space your self-built kernel actually needs, which
+ # enables you to make better estimates later:
+ du -ch /boot/*$(make -s kernelrelease)* | tail -n 1
+ du -sh /lib/modules/$(make -s kernelrelease)/
+ # * Hint: the output of the following command will help you pick the
+ # right kernel from the boot menu:
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+ # * Once booted, ensure you are running the kernel you just built by
+ # checking if the output of the next two commands matches:
+ tail -n 1 ~/kernels-built
+ uname -r
+ cat /proc/sys/kernel/tainted
+
+ c) Check if the problem occurs with this kernel as well.
+
+* **Segment 2**: ensure the 'good' kernel is also a 'working' kernel.
+
+ This among others verifies the trimmed .config file actually works well, as
+ bisecting with it otherwise would be a waste of time:
+
+ a) Start by checking out the sources of the 'good' version::
+
+ cd ~/linux/
+ git switch --discard-changes --detach v6.0
+
+ b) Build, install, and boot a kernel as described earlier in *segment 1,
+ section b* -- just feel free to skip the 'du' commands, as you have a rough
+ estimate already.
+
+ c) Ensure the feature that regressed with the 'broken' kernel actually works
+ with this one.
+
+* **Segment 3**: perform and validate the bisection.
+
+ a) Retrieve the sources for your 'bad' version::
+
+ git remote set-branches --add stable linux-6.1.y
+ git fetch stable
+
+ b) Initialize the bisection::
+
+ cd ~/linux/
+ git bisect start
+ git bisect good v6.0
+ git bisect bad v6.1.5
+
+ c) Build, install, and boot a kernel as described earlier in *segment 1,
+ section b*.
+
+ In case building or booting the kernel fails for unrelated reasons, run
+ ``git bisect skip``. In all other outcomes, check if the regressed feature
+ works with the newly built kernel. If it does, tell Git by executing
+ ``git bisect good``; if it does not, run ``git bisect bad`` instead.
+
+ All three commands will make Git check out another commit; then re-execute
+ this step (e.g. build, install, boot, and test a kernel to then tell Git
+ the outcome). Do so again and again until Git shows which commit broke
+ things. If you run short of disk space during this process, check the
+ section 'Complementary tasks: cleanup during and after the process'
+ below.
+
+ d) Once your finished the bisection, put a few things away::
+
+ cd ~/linux/
+ git bisect log > ~/bisect-log
+ cp .config ~/bisection-config-culprit
+ git bisect reset
+
+ e) Try to verify the bisection result::
+
+ git switch --discard-changes --detach mainline/master
+ git revert --no-edit cafec0cacaca0
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ This is optional, as some commits are impossible to revert. But if the
+ second command worked flawlessly, build, install, and boot one more kernel
+ kernel; just this time skip the first command copying the base .config file
+ over, as that already has been taken care off.
+
+* **Complementary tasks**: cleanup during and after the process.
+
+ a) To avoid running out of disk space during a bisection, you might need to
+ remove some kernels you built earlier. You most likely want to keep those
+ you built during segment 1 and 2 around for a while, but you will most
+ likely no longer need kernels tested during the actual bisection
+ (Segment 3 c). You can list them in build order using::
+
+ ls -ltr /lib/modules/*-local*
+
+ To then for example erase a kernel that identifies itself as
+ '6.0-rc1-local-gcafec0cacaca0', use this::
+
+ sudo rm -rf /lib/modules/6.0-rc1-local-gcafec0cacaca0
+ sudo kernel-install -v remove 6.0-rc1-local-gcafec0cacaca0
+ # * Note, on some distributions kernel-install is missing
+ # or does only part of the job.
+
+ b) If you performed a bisection and successfully validated the result, feel
+ free to remove all kernels built during the actual bisection (Segment 3 c);
+ the kernels you built earlier and later you might want to keep around for
+ a week or two.
+
+* **Optional task**: test a debug patch or a proposed fix later::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+ git apply /tmp/foobars-proposed-fix-v1.patch
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1'
+
+ Build, install, and boot a kernel as described in *segment 1, section b* --
+ but this time omit the first command copying the build configuration over,
+ as that has been taken care of already.
+
+.. _introguide_bissbs:
+
+Step-by-step guide on how to verify bugs and bisect regressions
+===============================================================
+
+This guide describes how to set up your own Linux kernels for investigating bugs
+or regressions you intend to report. How far you want to follow the instructions
+depends on your issue:
+
+Execute all steps till the end of *segment 1* to **verify if your kernel problem
+is present in code supported by Linux kernel developers**. If it is, you are all
+set to report the bug -- unless it did not happen with earlier kernel versions,
+as then your want to at least continue with *segment 2* to **check if the issue
+qualifies as regression** which receive priority treatment. Depending on the
+outcome you then are ready to report a bug or submit a preliminary regression
+report; instead of the latter your could also head straight on and follow
+*segment 3* to **perform a bisection** for a full-fledged regression report
+developers are obliged to act upon.
+
+ :ref:`Preparations: set up everything to build your own kernels <introprep_bissbs>`.
+
+ :ref:`Segment 1: try to reproduce the problem with the latest codebase <introlatestcheck_bissbs>`.
+
+ :ref:`Segment 2: check if the kernels you build work fine <introworkingcheck_bissbs>`.
+
+ :ref:`Segment 3: perform a bisection and validate the result <introbisect_bissbs>`.
+
+ :ref:`Complementary tasks: cleanup during and after following this guide <introclosure_bissbs>`.
+
+ :ref:`Optional tasks: test reverts, patches, or later versions <introoptional_bissbs>`.
+
+The steps in each segment illustrate the important aspects of the process, while
+a comprehensive reference section holds additional details for almost all of the
+steps. The reference section sometimes also outlines alternative approaches,
+pitfalls, as well as problems that might occur at the particular step -- and how
+to get things rolling again.
+
+For further details on how to report Linux kernel issues or regressions check
+out Documentation/admin-guide/reporting-issues.rst, which works in conjunction
+with this document. It among others explains why you need to verify bugs with
+the latest 'mainline' kernel (e.g. versions like 6.0, 6.1-rc1, or 6.1-rc6),
+even if you face a problem with a kernel from a 'stable/longterm' series
+(say 6.0.13).
+
+For users facing a regression that document also explains why sending a
+preliminary report after segment 2 might be wise, as the regression and its
+culprit might be known already. For further details on what actually qualifies
+as a regression check out Documentation/admin-guide/reporting-regressions.rst.
+
+If you run into any problems while following this guide or have ideas how to
+improve it, :ref:`please let the kernel developers know <submit_improvements_vbbr>`.
+
+.. _introprep_bissbs:
+
+Preparations: set up everything to build your own kernels
+---------------------------------------------------------
+
+The following steps lay the groundwork for all further tasks.
+
+Note: the instructions assume you are building and testing on the same
+machine; if you want to compile the kernel on another system, check
+:ref:`Build kernels on a different machine <buildhost_bis>` below.
+
+.. _backup_bissbs:
+
+* Create a fresh backup and put system repair and restore tools at hand, just
+ to be prepared for the unlikely case of something going sideways.
+
+ [:ref:`details <backup_bisref>`]
+
+.. _vanilla_bissbs:
+
+* Remove all software that depends on externally developed kernel drivers or
+ builds them automatically. That includes but is not limited to DKMS, openZFS,
+ VirtualBox, and Nvidia's graphics drivers (including the GPLed kernel module).
+
+ [:ref:`details <vanilla_bisref>`]
+
+.. _secureboot_bissbs:
+
+* On platforms with 'Secure Boot' or similar solutions, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot. The
+ quickest and easiest way to achieve this on commodity x86 systems is to
+ disable such techniques in the BIOS setup utility; alternatively, remove
+ their restrictions through a process initiated by
+ ``mokutil --disable-validation``.
+
+ [:ref:`details <secureboot_bisref>`]
+
+.. _rangecheck_bissbs:
+
+* Determine the kernel versions considered 'good' and 'bad' throughout this
+ guide:
+
+ * Do you follow this guide to verify if a bug is present in the code the
+ primary developers care for? Then consider the version of the newest kernel
+ you regularly use currently as 'good' (e.g. 6.0, 6.0.13, or 6.1-rc2).
+
+ * Do you face a regression, e.g. something broke or works worse after
+ switching to a newer kernel version? In that case it depends on the version
+ range during which the problem appeared:
+
+ * Something regressed when updating from a stable/longterm release
+ (say 6.0.13) to a newer mainline series (like 6.1-rc7 or 6.1) or a
+ stable/longterm version based on one (say 6.1.5)? Then consider the
+ mainline release your working kernel is based on to be the 'good'
+ version (e.g. 6.0) and the first version to be broken as the 'bad' one
+ (e.g. 6.1-rc7, 6.1, or 6.1.5). Note, at this point it is merely assumed
+ that 6.0 is fine; this hypothesis will be checked in segment 2.
+
+ * Something regressed when switching from one mainline version (say 6.0) to
+ a later one (like 6.1-rc1) or a stable/longterm release based on it
+ (say 6.1.5)? Then regard the last working version (e.g. 6.0) as 'good' and
+ the first broken (e.g. 6.1-rc1 or 6.1.5) as 'bad'.
+
+ * Something regressed when updating within a stable/longterm series (say
+ from 6.0.13 to 6.0.15)? Then consider those versions as 'good' and 'bad'
+ (e.g. 6.0.13 and 6.0.15), as you need to bisect within that series.
+
+ *Note, do not confuse 'good' version with 'working' kernel; the latter term
+ throughout this guide will refer to the last kernel that has been working
+ fine.*
+
+ [:ref:`details <rangecheck_bisref>`]
+
+.. _bootworking_bissbs:
+
+* Boot into the 'working' kernel and briefly use the apparently broken feature.
+
+ [:ref:`details <bootworking_bisref>`]
+
+.. _diskspace_bissbs:
+
+* Ensure to have enough free space for building Linux. 15 Gigabyte in your home
+ directory should typically suffice. If you have less available, be sure to pay
+ attention to later steps about retrieving the Linux sources and handling of
+ debug symbols: both explain approaches reducing the amount of space, which
+ should allow you to master these tasks with about 4 Gigabytes free space.
+
+ [:ref:`details <diskspace_bisref>`]
+
+.. _buildrequires_bissbs:
+
+* Install all software required to build a Linux kernel. Often you will need:
+ 'bc', 'binutils' ('ld' et al.), 'bison', 'flex', 'gcc', 'git', 'openssl',
+ 'pahole', 'perl', and the development headers for 'libelf' and 'openssl'. The
+ reference section shows how to quickly install those on various popular Linux
+ distributions.
+
+ [:ref:`details <buildrequires_bisref>`]
+
+.. _sources_bissbs:
+
+* Retrieve the mainline Linux sources; then change into the directory holding
+ them, as all further commands in this guide are meant to be executed from
+ there.
+
+ *Note, the following describe how to retrieve the sources using a full
+ mainline clone, which downloads about 2,75 GByte as of early 2024. The*
+ :ref:`reference section describes two alternatives <sources_bisref>` *:
+ one downloads less than 500 MByte, the other works better with unreliable
+ internet connections.*
+
+ Execute the following command to retrieve a fresh mainline codebase while
+ preparing things to add branches for stable/longterm series later::
+
+ git clone -o mainline --no-checkout \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+ [:ref:`details <sources_bisref>`]
+
+.. _stablesources_bissbs:
+
+* Is one of the versions you earlier established as 'good' or 'bad' a stable or
+ longterm release (say 6.1.5)? Then download the code for the series it belongs
+ to ('linux-6.1.y' in this example)::
+
+ git remote set-branches --add stable linux-6.1.y
+ git fetch stable
+
+.. _oldconfig_bissbs:
+
+* Start preparing a kernel build configuration (the '.config' file).
+
+ Before doing so, ensure you are still running the 'working' kernel an earlier
+ step told you to boot; if you are unsure, check the current kernelrelease
+ identifier using ``uname -r``.
+
+ Afterwards check out the source code for the version earlier established as
+ 'good'. In the following example command this is assumed to be 6.0; note that
+ the version number in this and all later Git commands needs to be prefixed
+ with a 'v'::
+
+ git switch --discard-changes --detach v6.0
+
+ Now create a build configuration file::
+
+ make olddefconfig
+
+ The kernel build scripts then will try to locate the build configuration file
+ for the running kernel and then adjust it for the needs of the kernel sources
+ you checked out. While doing so, it will print a few lines you need to check.
+
+ Look out for a line starting with '# using defaults found in'. It should be
+ followed by a path to a file in '/boot/' that contains the release identifier
+ of your currently working kernel. If the line instead continues with something
+ like 'arch/x86/configs/x86_64_defconfig', then the build infra failed to find
+ the .config file for your running kernel -- in which case you have to put one
+ there manually, as explained in the reference section.
+
+ In case you can not find such a line, look for one containing '# configuration
+ written to .config'. If that's the case you have a stale build configuration
+ lying around. Unless you intend to use it, delete it; afterwards run
+ 'make olddefconfig' again and check if it now picked up the right config file
+ as base.
+
+ [:ref:`details <oldconfig_bisref>`]
+
+.. _localmodconfig_bissbs:
+
+* Disable any kernel modules apparently superfluous for your setup. This is
+ optional, but especially wise for bisections, as it speeds up the build
+ process enormously -- at least unless the .config file picked up in the
+ previous step was already tailored to your and your hardware needs, in which
+ case you should skip this step.
+
+ To prepare the trimming, connect external hardware you occasionally use (USB
+ keys, tokens, ...), quickly start a VM, and bring up VPNs. And if you rebooted
+ since you started that guide, ensure that you tried using the feature causing
+ trouble since you started the system. Only then trim your .config::
+
+ yes '' | make localmodconfig
+
+ There is a catch to this, as the 'apparently' in initial sentence of this step
+ and the preparation instructions already hinted at:
+
+ The 'localmodconfig' target easily disables kernel modules for features only
+ used occasionally -- like modules for external peripherals not yet connected
+ since booting, virtualization software not yet utilized, VPN tunnels, and a
+ few other things. That's because some tasks rely on kernel modules Linux only
+ loads when you execute tasks like the aforementioned ones for the first time.
+
+ This drawback of localmodconfig is nothing you should lose sleep over, but
+ something to keep in mind: if something is misbehaving with the kernels built
+ during this guide, this is most likely the reason. You can reduce or nearly
+ eliminate the risk with tricks outlined in the reference section; but when
+ building a kernel just for quick testing purposes this is usually not worth
+ spending much effort on, as long as it boots and allows to properly test the
+ feature that causes trouble.
+
+ [:ref:`details <localmodconfig_bisref>`]
+
+.. _tagging_bissbs:
+
+* Ensure all the kernels you will build are clearly identifiable using a special
+ tag and a unique version number::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local'
+ ./scripts/config -e CONFIG_LOCALVERSION_AUTO
+
+ [:ref:`details <tagging_bisref>`]
+
+.. _debugsymbols_bissbs:
+
+* Decide how to handle debug symbols.
+
+ In the context of this document it is often wise to enable them, as there is a
+ decent chance you will need to decode a stack trace from a 'panic', 'Oops',
+ 'warning', or 'BUG'::
+
+ ./scripts/config -d DEBUG_INFO_NONE -e KALLSYMS_ALL -e DEBUG_KERNEL \
+ -e DEBUG_INFO -e DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT -e KALLSYMS
+
+ But if you are extremely short on storage space, you might want to disable
+ debug symbols instead::
+
+ ./scripts/config -d DEBUG_INFO -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT \
+ -d DEBUG_INFO_DWARF4 -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE
+
+ [:ref:`details <debugsymbols_bisref>`]
+
+.. _configmods_bissbs:
+
+* Check if you may want or need to adjust some other kernel configuration
+ options:
+
+ * Are you running Debian? Then you want to avoid known problems by performing
+ additional adjustments explained in the reference section.
+
+ [:ref:`details <configmods_distros_bisref>`].
+
+ * If you want to influence other aspects of the configuration, do so now using
+ your preferred tool. Note, to use make targets like 'menuconfig' or
+ 'nconfig', you will need to install the development files of ncurses; for
+ 'xconfig' you likewise need the Qt5 or Qt6 headers.
+
+ [:ref:`details <configmods_individual_bisref>`].
+
+.. _saveconfig_bissbs:
+
+* Reprocess the .config after the latest adjustments and store it in a safe
+ place::
+
+ make olddefconfig
+ cp .config ~/kernel-config-working
+
+ [:ref:`details <saveconfig_bisref>`]
+
+.. _introlatestcheck_bissbs:
+
+Segment 1: try to reproduce the problem with the latest codebase
+----------------------------------------------------------------
+
+The following steps verify if the problem occurs with the code currently
+supported by developers. In case you face a regression, it also checks that the
+problem is not caused by some .config change, as reporting the issue then would
+be a waste of time. [:ref:`details <introlatestcheck_bisref>`]
+
+.. _checkoutmaster_bissbs:
+
+* Check out the latest Linux codebase.
+
+ * Are your 'good' and 'bad' versions from the same stable or longterm series?
+ Then check the `front page of kernel.org <https://kernel.org/>`_: if it
+ lists a release from that series without an '[EOL]' tag, checkout the series
+ latest version ('linux-6.1.y' in the following example)::
+
+ cd ~/linux/
+ git switch --discard-changes --detach stable/linux-6.1.y
+
+ Your series is unsupported, if is not listed or carrying a 'end of life'
+ tag. In that case you might want to check if a successor series (say
+ linux-6.2.y) or mainline (see next point) fix the bug.
+
+ * In all other cases, run::
+
+ cd ~/linux/
+ git switch --discard-changes --detach mainline/master
+
+ [:ref:`details <checkoutmaster_bisref>`]
+
+.. _build_bissbs:
+
+* Build the image and the modules of your first kernel using the config file you
+ prepared::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+
+ If you want your kernel packaged up as deb, rpm, or tar file, see the
+ reference section for alternatives, which obviously will require other
+ steps to install as well.
+
+ [:ref:`details <build_bisref>`]
+
+.. _install_bissbs:
+
+* Install your newly built kernel.
+
+ Before doing so, consider checking if there is still enough space for it::
+
+ df -h /boot/ /lib/modules/
+
+ For now assume 150 MByte in /boot/ and 200 in /lib/modules/ will suffice; how
+ much your kernels actually require will be determined later during this guide.
+
+ Now install the kernel's modules and its image, which will be stored in
+ parallel to the your Linux distribution's kernels::
+
+ sudo make modules_install
+ command -v installkernel && sudo make install
+
+ The second command ideally will take care of three steps required at this
+ point: copying the kernel's image to /boot/, generating an initramfs, and
+ adding an entry for both to the boot loader's configuration.
+
+ Sadly some distributions (among them Arch Linux, its derivatives, and many
+ immutable Linux distributions) will perform none or only some of those tasks.
+ You therefore want to check if all of them were taken care of and manually
+ perform those that were not. The reference section provides further details on
+ that; your distribution's documentation might help, too.
+
+ Once you figured out the steps needed at this point, consider writing them
+ down: if you will build more kernels as described in segment 2 and 3, you will
+ have to perform those again after executing ``command -v installkernel [...]``.
+
+ [:ref:`details <install_bisref>`]
+
+.. _storagespace_bissbs:
+
+* In case you plan to follow this guide further, check how much storage space
+ the kernel, its modules, and other related files like the initramfs consume::
+
+ du -ch /boot/*$(make -s kernelrelease)* | tail -n 1
+ du -sh /lib/modules/$(make -s kernelrelease)/
+
+ Write down or remember those two values for later: they enable you to prevent
+ running out of disk space accidentally during a bisection.
+
+ [:ref:`details <storagespace_bisref>`]
+
+.. _kernelrelease_bissbs:
+
+* Show and store the kernelrelease identifier of the kernel you just built::
+
+ make -s kernelrelease | tee -a ~/kernels-built
+
+ Remember the identifier momentarily, as it will help you pick the right kernel
+ from the boot menu upon restarting.
+
+* Reboot into your newly built kernel. To ensure your actually started the one
+ you just built, you might want to verify if the output of these commands
+ matches::
+
+ tail -n 1 ~/kernels-built
+ uname -r
+
+.. _tainted_bissbs:
+
+* Check if the kernel marked itself as 'tainted'::
+
+ cat /proc/sys/kernel/tainted
+
+ If that command does not return '0', check the reference section, as the cause
+ for this might interfere with your testing.
+
+ [:ref:`details <tainted_bisref>`]
+
+.. _recheckbroken_bissbs:
+
+* Verify if your bug occurs with the newly built kernel. If it does not, check
+ out the instructions in the reference section to ensure nothing went sideways
+ during your tests.
+
+ [:ref:`details <recheckbroken_bisref>`]
+
+.. _recheckstablebroken_bissbs:
+
+* Did you just built a stable or longterm kernel? And were you able to reproduce
+ the regression with it? Then you should test the latest mainline codebase as
+ well, because the result determines which developers the bug must be submitted
+ to.
+
+ To prepare that test, check out current mainline::
+
+ cd ~/linux/
+ git switch --discard-changes --detach mainline/master
+
+ Now use the checked out code to build and install another kernel using the
+ commands the earlier steps already described in more detail::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ Confirm you booted the kernel you intended to start and check its tainted
+ status::
+
+ tail -n 1 ~/kernels-built
+ uname -r
+ cat /proc/sys/kernel/tainted
+
+ Now verify if this kernel is showing the problem. If it does, then you need
+ to report the bug to the primary developers; if it does not, report it to the
+ stable team. See Documentation/admin-guide/reporting-issues.rst for details.
+
+ [:ref:`details <recheckstablebroken_bisref>`]
+
+Do you follow this guide to verify if a problem is present in the code
+currently supported by Linux kernel developers? Then you are done at this
+point. If you later want to remove the kernel you just built, check out
+:ref:`Complementary tasks: cleanup during and after following this guide <introclosure_bissbs>`.
+
+In case you face a regression, move on and execute at least the next segment
+as well.
+
+.. _introworkingcheck_bissbs:
+
+Segment 2: check if the kernels you build work fine
+---------------------------------------------------
+
+In case of a regression, you now want to ensure the trimmed configuration file
+you created earlier works as expected; a bisection with the .config file
+otherwise would be a waste of time. [:ref:`details <introworkingcheck_bisref>`]
+
+.. _recheckworking_bissbs:
+
+* Build your own variant of the 'working' kernel and check if the feature that
+ regressed works as expected with it.
+
+ Start by checking out the sources for the version earlier established as
+ 'good' (once again assumed to be 6.0 here)::
+
+ cd ~/linux/
+ git switch --discard-changes --detach v6.0
+
+ Now use the checked out code to configure, build, and install another kernel
+ using the commands the previous subsection explained in more detail::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ When the system booted, you may want to verify once again that the
+ kernel you started is the one you just built::
+
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ Now check if this kernel works as expected; if not, consult the reference
+ section for further instructions.
+
+ [:ref:`details <recheckworking_bisref>`]
+
+.. _introbisect_bissbs:
+
+Segment 3: perform the bisection and validate the result
+--------------------------------------------------------
+
+With all the preparations and precaution builds taken care of, you are now ready
+to begin the bisection. This will make you build quite a few kernels -- usually
+about 15 in case you encountered a regression when updating to a newer series
+(say from 6.0.13 to 6.1.5). But do not worry, due to the trimmed build
+configuration created earlier this works a lot faster than many people assume:
+overall on average it will often just take about 10 to 15 minutes to compile
+each kernel on commodity x86 machines.
+
+.. _bisectstart_bissbs:
+
+* Start the bisection and tell Git about the versions earlier established as
+ 'good' (6.0 in the following example command) and 'bad' (6.1.5)::
+
+ cd ~/linux/
+ git bisect start
+ git bisect good v6.0
+ git bisect bad v6.1.5
+
+ [:ref:`details <bisectstart_bisref>`]
+
+.. _bisectbuild_bissbs:
+
+* Now use the code Git checked out to build, install, and boot a kernel using
+ the commands introduced earlier::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ If compilation fails for some reason, run ``git bisect skip`` and restart
+ executing the stack of commands from the beginning.
+
+ In case you skipped the 'test latest codebase' step in the guide, check its
+ description as for why the 'df [...]' and 'make -s kernelrelease [...]'
+ commands are here.
+
+ Important note: the latter command from this point on will print release
+ identifiers that might look odd or wrong to you -- which they are not, as it's
+ totally normal to see release identifiers like '6.0-rc1-local-gcafec0cacaca0'
+ if you bisect between versions 6.1 and 6.2 for example.
+
+ [:ref:`details <bisectbuild_bisref>`]
+
+.. _bisecttest_bissbs:
+
+* Now check if the feature that regressed works in the kernel you just built.
+
+ You again might want to start by making sure the kernel you booted is the one
+ you just built::
+
+ cd ~/linux/
+ tail -n 1 ~/kernels-built
+ uname -r
+
+ Now verify if the feature that regressed works at this kernel bisection point.
+ If it does, run this::
+
+ git bisect good
+
+ If it does not, run this::
+
+ git bisect bad
+
+ Be sure about what you tell Git, as getting this wrong just once will send the
+ rest of the bisection totally off course.
+
+ While the bisection is ongoing, Git will use the information you provided to
+ find and check out another bisection point for you to test. While doing so, it
+ will print something like 'Bisecting: 675 revisions left to test after this
+ (roughly 10 steps)' to indicate how many further changes it expects to be
+ tested. Now build and install another kernel using the instructions from the
+ previous step; afterwards follow the instructions in this step again.
+
+ Repeat this again and again until you finish the bisection -- that's the case
+ when Git after tagging a change as 'good' or 'bad' prints something like
+ 'cafecaca0c0dacafecaca0c0dacafecaca0c0da is the first bad commit'; right
+ afterwards it will show some details about the culprit including the patch
+ description of the change. The latter might fill your terminal screen, so you
+ might need to scroll up to see the message mentioning the culprit;
+ alternatively, run ``git bisect log > ~/bisection-log``.
+
+ [:ref:`details <bisecttest_bisref>`]
+
+.. _bisectlog_bissbs:
+
+* Store Git's bisection log and the current .config file in a safe place before
+ telling Git to reset the sources to the state before the bisection::
+
+ cd ~/linux/
+ git bisect log > ~/bisection-log
+ cp .config ~/bisection-config-culprit
+ git bisect reset
+
+ [:ref:`details <bisectlog_bisref>`]
+
+.. _revert_bissbs:
+
+* Try reverting the culprit on top of latest mainline to see if this fixes your
+ regression.
+
+ This is optional, as it might be impossible or hard to realize. The former is
+ the case, if the bisection determined a merge commit as the culprit; the
+ latter happens if other changes depend on the culprit. But if the revert
+ succeeds, it is worth building another kernel, as it validates the result of
+ a bisection, which can easily deroute; it furthermore will let kernel
+ developers know, if they can resolve the regression with a quick revert.
+
+ Begin by checking out the latest codebase depending on the range you bisected:
+
+ * Did you face a regression within a stable/longterm series (say between
+ 6.0.13 and 6.0.15) that does not happen in mainline? Then check out the
+ latest codebase for the affected series like this::
+
+ git fetch stable
+ git switch --discard-changes --detach linux-6.0.y
+
+ * In all other cases check out latest mainline::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+
+ If you bisected a regression within a stable/longterm series that also
+ happens in mainline, there is one more thing to do: look up the mainline
+ commit-id. To do so, use a command like ``git show abcdcafecabcd`` to
+ view the patch description of the culprit. There will be a line near
+ the top which looks like 'commit cafec0cacaca0 upstream.' or
+ 'Upstream commit cafec0cacaca0'; use that commit-id in the next command
+ and not the one the bisection blamed.
+
+ Now try reverting the culprit by specifying its commit id::
+
+ git revert --no-edit cafec0cacaca0
+
+ If that fails, give up trying and move on to the next step; if it works,
+ adjust the tag to facilitate the identification and prevent accidentally
+ overwriting another kernel::
+
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ Build a kernel using the familiar command sequence, just without copying the
+ the base .config over::
+
+ make olddefconfig &&
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+ Now check one last time if the feature that made you perform a bisection works
+ with that kernel: if everything went well, it should not show the regression.
+
+ [:ref:`details <revert_bisref>`]
+
+.. _introclosure_bissbs:
+
+Complementary tasks: cleanup during and after the bisection
+-----------------------------------------------------------
+
+During and after following this guide you might want or need to remove some of
+the kernels you installed: the boot menu otherwise will become confusing or
+space might run out.
+
+.. _makeroom_bissbs:
+
+* To remove one of the kernels you installed, look up its 'kernelrelease'
+ identifier. This guide stores them in '~/kernels-built', but the following
+ command will print them as well::
+
+ ls -ltr /lib/modules/*-local*
+
+ You in most situations want to remove the oldest kernels built during the
+ actual bisection (e.g. segment 3 of this guide). The two ones you created
+ beforehand (e.g. to test the latest codebase and the version considered
+ 'good') might become handy to verify something later -- thus better keep them
+ around, unless you are really short on storage space.
+
+ To remove the modules of a kernel with the kernelrelease identifier
+ '*6.0-rc1-local-gcafec0cacaca0*', start by removing the directory holding its
+ modules::
+
+ sudo rm -rf /lib/modules/6.0-rc1-local-gcafec0cacaca0
+
+ Afterwards try the following command::
+
+ sudo kernel-install -v remove 6.0-rc1-local-gcafec0cacaca0
+
+ On quite a few distributions this will delete all other kernel files installed
+ while also removing the kernel's entry from the boot menu. But on some
+ distributions kernel-install does not exist or leaves boot-loader entries or
+ kernel image and related files behind; in that case remove them as described
+ in the reference section.
+
+ [:ref:`details <makeroom_bisref>`]
+
+.. _finishingtouch_bissbs:
+
+* Once you have finished the bisection, do not immediately remove anything you
+ set up, as you might need a few things again. What is safe to remove depends
+ on the outcome of the bisection:
+
+ * Could you initially reproduce the regression with the latest codebase and
+ after the bisection were able to fix the problem by reverting the culprit on
+ top of the latest codebase? Then you want to keep those two kernels around
+ for a while, but safely remove all others with a '-local' in the release
+ identifier.
+
+ * Did the bisection end on a merge-commit or seems questionable for other
+ reasons? Then you want to keep as many kernels as possible around for a few
+ days: it's pretty likely that you will be asked to recheck something.
+
+ * In other cases it likely is a good idea to keep the following kernels around
+ for some time: the one built from the latest codebase, the one created from
+ the version considered 'good', and the last three or four you compiled
+ during the actual bisection process.
+
+ [:ref:`details <finishingtouch_bisref>`]
+
+.. _introoptional_bissbs:
+
+Optional: test reverts, patches, or later versions
+--------------------------------------------------
+
+While or after reporting a bug, you might want or potentially will be asked to
+test reverts, debug patches, proposed fixes, or other versions. In that case
+follow these instructions.
+
+* Update your Git clone and check out the latest code.
+
+ * In case you want to test mainline, fetch its latest changes before checking
+ its code out::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+
+ * In case you want to test a stable or longterm kernel, first add the branch
+ holding the series you are interested in (6.2 in the example), unless you
+ already did so earlier::
+
+ git remote set-branches --add stable linux-6.2.y
+
+ Then fetch the latest changes and check out the latest version from the
+ series::
+
+ git fetch stable
+ git switch --discard-changes --detach stable/linux-6.2.y
+
+* Copy your kernel build configuration over::
+
+ cp ~/kernel-config-working .config
+
+* Your next step depends on what you want to do:
+
+ * In case you just want to test the latest codebase, head to the next step,
+ you are already all set.
+
+ * In case you want to test if a revert fixes an issue, revert one or multiple
+ changes by specifying their commit ids::
+
+ git revert --no-edit cafec0cacaca0
+
+ Now give that kernel a special tag to facilitates its identification and
+ prevent accidentally overwriting another kernel::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ * In case you want to test a patch, store the patch in a file like
+ '/tmp/foobars-proposed-fix-v1.patch' and apply it like this::
+
+ git apply /tmp/foobars-proposed-fix-v1.patch
+
+ In case of multiple patches, repeat this step with the others.
+
+ Now give that kernel a special tag to facilitates its identification and
+ prevent accidentally overwriting another kernel::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1'
+
+* Build a kernel using the familiar commands, just without copying the kernel
+ build configuration over, as that has been taken care of already::
+
+ make olddefconfig &&
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+* Now verify you booted the newly built kernel and check it.
+
+[:ref:`details <introoptional_bisref>`]
+
+.. _submit_improvements_vbbr:
+
+Conclusion
+----------
+
+You have reached the end of the step-by-step guide.
+
+Did you run into trouble following any of the above steps not cleared up by the
+reference section below? Did you spot errors? Or do you have ideas how to
+improve the guide?
+
+If any of that applies, please take a moment and let the maintainer of this
+document know by email (Thorsten Leemhuis <linux@leemhuis.info>), ideally while
+CCing the Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is
+vital to improve this text further, which is in everybody's interest, as it
+will enable more people to master the task described here -- and hopefully also
+improve similar guides inspired by this one.
+
+
+Reference section for the step-by-step guide
+============================================
+
+This section holds additional information for almost all the items in the above
+step-by-step guide.
+
+Preparations for building your own kernels
+------------------------------------------
+
+ *The steps in this section lay the groundwork for all further tests.*
+ [:ref:`... <introprep_bissbs>`]
+
+The steps in all later sections of this guide depend on those described here.
+
+[:ref:`back to step-by-step guide <introprep_bissbs>`].
+
+.. _backup_bisref:
+
+Prepare for emergencies
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Create a fresh backup and put system repair and restore tools at hand.*
+ [:ref:`... <backup_bissbs>`]
+
+Remember, you are dealing with computers, which sometimes do unexpected things
+-- especially if you fiddle with crucial parts like the kernel of an operating
+system. That's what you are about to do in this process. Hence, better prepare
+for something going sideways, even if that should not happen.
+
+[:ref:`back to step-by-step guide <backup_bissbs>`]
+
+.. _vanilla_bisref:
+
+Remove anything related to externally maintained kernel modules
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Remove all software that depends on externally developed kernel drivers or
+ builds them automatically.* [:ref:`...<vanilla_bissbs>`]
+
+Externally developed kernel modules can easily cause trouble during a bisection.
+
+But there is a more important reason why this guide contains this step: most
+kernel developers will not care about reports about regressions occurring with
+kernels that utilize such modules. That's because such kernels are not
+considered 'vanilla' anymore, as Documentation/admin-guide/reporting-issues.rst
+explains in more detail.
+
+[:ref:`back to step-by-step guide <vanilla_bissbs>`]
+
+.. _secureboot_bisref:
+
+Deal with techniques like Secure Boot
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *On platforms with 'Secure Boot' or similar techniques, prepare everything to
+ ensure the system will permit your self-compiled kernel to boot later.*
+ [:ref:`... <secureboot_bissbs>`]
+
+Many modern systems allow only certain operating systems to start; that's why
+they reject booting self-compiled kernels by default.
+
+You ideally deal with this by making your platform trust your self-built kernels
+with the help of a certificate. How to do that is not described
+here, as it requires various steps that would take the text too far away from
+its purpose; 'Documentation/admin-guide/module-signing.rst' and various web
+sides already explain everything needed in more detail.
+
+Temporarily disabling solutions like Secure Boot is another way to make your own
+Linux boot. On commodity x86 systems it is possible to do this in the BIOS Setup
+utility; the required steps vary a lot between machines and therefore cannot be
+described here.
+
+On mainstream x86 Linux distributions there is a third and universal option:
+disable all Secure Boot restrictions for your Linux environment. You can
+initiate this process by running ``mokutil --disable-validation``; this will
+tell you to create a one-time password, which is safe to write down. Now
+restart; right after your BIOS performed all self-tests the bootloader Shim will
+show a blue box with a message 'Press any key to perform MOK management'. Hit
+some key before the countdown exposes, which will open a menu. Choose 'Change
+Secure Boot state'. Shim's 'MokManager' will now ask you to enter three
+randomly chosen characters from the one-time password specified earlier. Once
+you provided them, confirm you really want to disable the validation.
+Afterwards, permit MokManager to reboot the machine.
+
+[:ref:`back to step-by-step guide <secureboot_bissbs>`]
+
+.. _bootworking_bisref:
+
+Boot the last kernel that was working
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Boot into the last working kernel and briefly recheck if the feature that
+ regressed really works.* [:ref:`...<bootworking_bissbs>`]
+
+This will make later steps that cover creating and trimming the configuration do
+the right thing.
+
+[:ref:`back to step-by-step guide <bootworking_bissbs>`]
+
+.. _diskspace_bisref:
+
+Space requirements
+~~~~~~~~~~~~~~~~~~
+
+ *Ensure to have enough free space for building Linux.*
+ [:ref:`... <diskspace_bissbs>`]
+
+The numbers mentioned are rough estimates with a big extra charge to be on the
+safe side, so often you will need less.
+
+If you have space constraints, be sure to hay attention to the :ref:`step about
+debug symbols' <debugsymbols_bissbs>` and its :ref:`accompanying reference
+section' <debugsymbols_bisref>`, as disabling then will reduce the consumed disk
+space by quite a few gigabytes.
+
+[:ref:`back to step-by-step guide <diskspace_bissbs>`]
+
+.. _rangecheck_bisref:
+
+Bisection range
+~~~~~~~~~~~~~~~
+
+ *Determine the kernel versions considered 'good' and 'bad' throughout this
+ guide.* [:ref:`...<rangecheck_bissbs>`]
+
+Establishing the range of commits to be checked is mostly straightforward,
+except when a regression occurred when switching from a release of one stable
+series to a release of a later series (e.g. from 6.0.13 to 6.1.5). In that case
+Git will need some hand holding, as there is no straight line of descent.
+
+That's because with the release of 6.0 mainline carried on to 6.1 while the
+stable series 6.0.y branched to the side. It's therefore theoretically possible
+that the issue you face with 6.1.5 only worked in 6.0.13, as it was fixed by a
+commit that went into one of the 6.0.y releases, but never hit mainline or the
+6.1.y series. Thankfully that normally should not happen due to the way the
+stable/longterm maintainers maintain the code. It's thus pretty safe to assume
+6.0 as a 'good' kernel. That assumption will be tested anyway, as that kernel
+will be built and tested in the segment '2' of this guide; Git would force you
+to do this as well, if you tried bisecting between 6.0.13 and 6.1.15.
+
+[:ref:`back to step-by-step guide <rangecheck_bissbs>`]
+
+.. _buildrequires_bisref:
+
+Install build requirements
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Install all software required to build a Linux kernel.*
+ [:ref:`...<buildrequires_bissbs>`]
+
+The kernel is pretty stand-alone, but besides tools like the compiler you will
+sometimes need a few libraries to build one. How to install everything needed
+depends on your Linux distribution and the configuration of the kernel you are
+about to build.
+
+Here are a few examples what you typically need on some mainstream
+distributions:
+
+* Arch Linux and derivatives::
+
+ sudo pacman --needed -S bc binutils bison flex gcc git kmod libelf openssl \
+ pahole perl zlib ncurses qt6-base
+
+* Debian, Ubuntu, and derivatives::
+
+ sudo apt install bc binutils bison dwarves flex gcc git kmod libelf-dev \
+ libssl-dev make openssl pahole perl-base pkg-config zlib1g-dev \
+ libncurses-dev qt6-base-dev g++
+
+* Fedora and derivatives::
+
+ sudo dnf install binutils \
+ /usr/bin/{bc,bison,flex,gcc,git,openssl,make,perl,pahole,rpmbuild} \
+ /usr/include/{libelf.h,openssl/pkcs7.h,zlib.h,ncurses.h,qt6/QtGui/QAction}
+
+* openSUSE and derivatives::
+
+ sudo zypper install bc binutils bison dwarves flex gcc git \
+ kernel-install-tools libelf-devel make modutils openssl openssl-devel \
+ perl-base zlib-devel rpm-build ncurses-devel qt6-base-devel
+
+These commands install a few packages that are often, but not always needed. You
+for example might want to skip installing the development headers for ncurses,
+which you will only need in case you later might want to adjust the kernel build
+configuration using make the targets 'menuconfig' or 'nconfig'; likewise omit
+the headers of Qt6 if you do not plan to adjust the .config using 'xconfig'.
+
+You furthermore might need additional libraries and their development headers
+for tasks not covered in this guide -- for example when building utilities from
+the kernel's tools/ directory.
+
+[:ref:`back to step-by-step guide <buildrequires_bissbs>`]
+
+.. _sources_bisref:
+
+Download the sources using Git
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Retrieve the Linux mainline sources.*
+ [:ref:`...<sources_bissbs>`]
+
+The step-by-step guide outlines how to download the Linux sources using a full
+Git clone of Linus' mainline repository. There is nothing more to say about
+that -- but there are two alternatives ways to retrieve the sources that might
+work better for you:
+
+* If you have an unreliable internet connection, consider
+ :ref:`using a 'Git bundle'<sources_bundle_bisref>`.
+
+* If downloading the complete repository would take too long or requires too
+ much storage space, consider :ref:`using a 'shallow
+ clone'<sources_shallow_bisref>`.
+
+.. _sources_bundle_bisref:
+
+Downloading Linux mainline sources using a bundle
+"""""""""""""""""""""""""""""""""""""""""""""""""
+
+Use the following commands to retrieve the Linux mainline sources using a
+bundle::
+
+ wget -c \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/clone.bundle
+ git clone --no-checkout clone.bundle ~/linux/
+ cd ~/linux/
+ git remote remove origin
+ git remote add mainline \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ git fetch mainline
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+In case the 'wget' command fails, just re-execute it, it will pick up where
+it left off.
+
+[:ref:`back to step-by-step guide <sources_bissbs>`]
+[:ref:`back to section intro <sources_bisref>`]
+
+.. _sources_shallow_bisref:
+
+Downloading Linux mainline sources using a shallow clone
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First, execute the following command to retrieve the latest mainline codebase::
+
+ git clone -o mainline --no-checkout --depth 1 -b master \
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ~/linux/
+ cd ~/linux/
+ git remote add -t master stable \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+
+Now deepen your clone's history to the second predecessor of the mainline
+release of your 'good' version. In case the latter are 6.0 or 6.0.13, 5.19 would
+be the first predecessor and 5.18 the second -- hence deepen the history up to
+that version::
+
+ git fetch --shallow-exclude=v5.18 mainline
+
+Afterwards add the stable Git repository as remote and all required stable
+branches as explained in the step-by-step guide.
+
+Note, shallow clones have a few peculiar characteristics:
+
+* For bisections the history needs to be deepened a few mainline versions
+ farther than it seems necessary, as explained above already. That's because
+ Git otherwise will be unable to revert or describe most of the commits within
+ a range (say 6.1..6.2), as they are internally based on earlier kernels
+ releases (like 6.0-rc2 or 5.19-rc3).
+
+* This document in most places uses ``git fetch`` with ``--shallow-exclude=``
+ to specify the earliest version you care about (or to be precise: its git
+ tag). You alternatively can use the parameter ``--shallow-since=`` to specify
+ an absolute (say ``'2023-07-15'``) or relative (``'12 months'``) date to
+ define the depth of the history you want to download. When using them while
+ bisecting mainline, ensure to deepen the history to at least 7 months before
+ the release of the mainline release your 'good' kernel is based on.
+
+* Be warned, when deepening your clone you might encounter an error like
+ 'fatal: error in object: unshallow cafecaca0c0dacafecaca0c0dacafecaca0c0da'.
+ In that case run ``git repack -d`` and try again.
+
+[:ref:`back to step-by-step guide <sources_bissbs>`]
+[:ref:`back to section intro <sources_bisref>`]
+
+.. _oldconfig_bisref:
+
+Start defining the build configuration for your kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Start preparing a kernel build configuration (the '.config' file).*
+ [:ref:`... <oldconfig_bissbs>`]
+
+*Note, this is the first of multiple steps in this guide that create or modify
+build artifacts. The commands used in this guide store them right in the source
+tree to keep things simple. In case you prefer storing the build artifacts
+separately, create a directory like '~/linux-builddir/' and add the parameter
+``O=~/linux-builddir/`` to all make calls used throughout this guide. You will
+have to point other commands there as well -- among them the ``./scripts/config
+[...]`` commands, which will require ``--file ~/linux-builddir/.config`` to
+locate the right build configuration.*
+
+Two things can easily go wrong when creating a .config file as advised:
+
+* The oldconfig target will use a .config file from your build directory, if
+ one is already present there (e.g. '~/linux/.config'). That's totally fine if
+ that's what you intend (see next step), but in all other cases you want to
+ delete it. This for example is important in case you followed this guide
+ further, but due to problems come back here to redo the configuration from
+ scratch.
+
+* Sometimes olddefconfig is unable to locate the .config file for your running
+ kernel and will use defaults, as briefly outlined in the guide. In that case
+ check if your distribution ships the configuration somewhere and manually put
+ it in the right place (e.g. '~/linux/.config') if it does. On distributions
+ where /proc/config.gz exists this can be achieved using this command::
+
+ zcat /proc/config.gz > .config
+
+ Once you put it there, run ``make olddefconfig`` again to adjust it to the
+ needs of the kernel about to be built.
+
+Note, the olddefconfig target will set any undefined build options to their
+default value. If you prefer to set such configuration options manually, use
+``make oldconfig`` instead. Then for each undefined configuration option you
+will be asked how to proceed; in case you are unsure what to answer, simply hit
+'enter' to apply the default value. Note though that for bisections you normally
+want to go with the defaults, as you otherwise might enable a new feature that
+causes a problem looking like regressions (for example due to security
+restrictions).
+
+Occasionally odd things happen when trying to use a config file prepared for one
+kernel (say 6.1) on an older mainline release -- especially if it is much older
+(say 5.15). That's one of the reasons why the previous step in the guide told
+you to boot the kernel where everything works. If you manually add a .config
+file you thus want to ensure it's from the working kernel and not from a one
+that shows the regression.
+
+In case you want to build kernels for another machine, locate its kernel build
+configuration; usually ``ls /boot/config-$(uname -r)`` will print its name. Copy
+that file to the build machine and store it as ~/linux/.config; afterwards run
+``make olddefconfig`` to adjust it.
+
+[:ref:`back to step-by-step guide <oldconfig_bissbs>`]
+
+.. _localmodconfig_bisref:
+
+Trim the build configuration for your kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Disable any kernel modules apparently superfluous for your setup.*
+ [:ref:`... <localmodconfig_bissbs>`]
+
+As explained briefly in the step-by-step guide already: with localmodconfig it
+can easily happen that your self-built kernels will lack modules for tasks you
+did not perform at least once before utilizing this make target. That happens
+when a task requires kernel modules which are only autoloaded when you execute
+it for the first time. So when you never performed that task since starting your
+kernel the modules will not have been loaded -- and from localmodconfig's point
+of view look superfluous, which thus disables them to reduce the amount of code
+to be compiled.
+
+You can try to avoid this by performing typical tasks that often will autoload
+additional kernel modules: start a VM, establish VPN connections, loop-mount a
+CD/DVD ISO, mount network shares (CIFS, NFS, ...), and connect all external
+devices (2FA keys, headsets, webcams, ...) as well as storage devices with file
+systems you otherwise do not utilize (btrfs, ext4, FAT, NTFS, XFS, ...). But it
+is hard to think of everything that might be needed -- even kernel developers
+often forget one thing or another at this point.
+
+Do not let that risk bother you, especially when compiling a kernel only for
+testing purposes: everything typically crucial will be there. And if you forget
+something important you can turn on a missing feature manually later and quickly
+run the commands again to compile and install a kernel that has everything you
+need.
+
+But if you plan to build and use self-built kernels regularly, you might want to
+reduce the risk by recording which modules your system loads over the course of
+a few weeks. You can automate this with `modprobed-db
+<https://github.com/graysky2/modprobed-db>`_. Afterwards use ``LSMOD=<path>`` to
+point localmodconfig to the list of modules modprobed-db noticed being used::
+
+ yes '' | make LSMOD='${HOME}'/.config/modprobed.db localmodconfig
+
+That parameter also allows you to build trimmed kernels for another machine in
+case you copied a suitable .config over to use as base (see previous step). Just
+run ``lsmod > lsmod_foo-machine`` on that system and copy the generated file to
+your build's host home directory. Then run these commands instead of the one the
+step-by-step guide mentions::
+
+ yes '' | make LSMOD=~/lsmod_foo-machine localmodconfig
+
+[:ref:`back to step-by-step guide <localmodconfig_bissbs>`]
+
+.. _tagging_bisref:
+
+Tag the kernels about to be build
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Ensure all the kernels you will build are clearly identifiable using a
+ special tag and a unique version identifier.* [:ref:`... <tagging_bissbs>`]
+
+This allows you to differentiate your distribution's kernels from those created
+during this process, as the file or directories for the latter will contain
+'-local' in the name; it also helps picking the right entry in the boot menu and
+not lose track of you kernels, as their version numbers will look slightly
+confusing during the bisection.
+
+[:ref:`back to step-by-step guide <tagging_bissbs>`]
+
+.. _debugsymbols_bisref:
+
+Decide to enable or disable debug symbols
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Decide how to handle debug symbols.* [:ref:`... <debugsymbols_bissbs>`]
+
+Having debug symbols available can be important when your kernel throws a
+'panic', 'Oops', 'warning', or 'BUG' later when running, as then you will be
+able to find the exact place where the problem occurred in the code. But
+collecting and embedding the needed debug information takes time and consumes
+quite a bit of space: in late 2022 the build artifacts for a typical x86 kernel
+trimmed with localmodconfig consumed around 5 Gigabyte of space with debug
+symbols, but less than 1 when they were disabled. The resulting kernel image and
+modules are bigger as well, which increases storage requirements for /boot/ and
+load times.
+
+In case you want a small kernel and are unlikely to decode a stack trace later,
+you thus might want to disable debug symbols to avoid those downsides. If it
+later turns out that you need them, just enable them as shown and rebuild the
+kernel.
+
+You on the other hand definitely want to enable them for this process, if there
+is a decent chance that you need to decode a stack trace later. The section
+'Decode failure messages' in Documentation/admin-guide/reporting-issues.rst
+explains this process in more detail.
+
+[:ref:`back to step-by-step guide <debugsymbols_bissbs>`]
+
+.. _configmods_bisref:
+
+Adjust build configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check if you may want or need to adjust some other kernel configuration
+ options:*
+
+Depending on your needs you at this point might want or have to adjust some
+kernel configuration options.
+
+.. _configmods_distros_bisref:
+
+Distro specific adjustments
+"""""""""""""""""""""""""""
+
+ *Are you running* [:ref:`... <configmods_bissbs>`]
+
+The following sections help you to avoid build problems that are known to occur
+when following this guide on a few commodity distributions.
+
+**Debian:**
+
+* Remove a stale reference to a certificate file that would cause your build to
+ fail::
+
+ ./scripts/config --set-str SYSTEM_TRUSTED_KEYS ''
+
+ Alternatively, download the needed certificate and make that configuration
+ option point to it, as `the Debian handbook explains in more detail
+ <https://debian-handbook.info/browse/stable/sect.kernel-compilation.html>`_
+ -- or generate your own, as explained in
+ Documentation/admin-guide/module-signing.rst.
+
+[:ref:`back to step-by-step guide <configmods_bissbs>`]
+
+.. _configmods_individual_bisref:
+
+Individual adjustments
+""""""""""""""""""""""
+
+ *If you want to influence the other aspects of the configuration, do so
+ now.* [:ref:`... <configmods_bissbs>`]
+
+At this point you can use a command like ``make menuconfig`` or ``make nconfig``
+to enable or disable certain features using a text-based user interface; to use
+a graphical configuration utility, run ``make xconfig`` instead. Both of them
+require development libraries from toolkits they are rely on (ncurses
+respectively Qt5 or Qt6); an error message will tell you if something required
+is missing.
+
+[:ref:`back to step-by-step guide <configmods_bissbs>`]
+
+.. _saveconfig_bisref:
+
+Put the .config file aside
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Reprocess the .config after the latest changes and store it in a safe place.*
+ [:ref:`... <saveconfig_bissbs>`]
+
+Put the .config you prepared aside, as you want to copy it back to the build
+directory every time during this guide before you start building another
+kernel. That's because going back and forth between different versions can alter
+.config files in odd ways; those occasionally cause side effects that could
+confuse testing or in some cases render the result of your bisection
+meaningless.
+
+[:ref:`back to step-by-step guide <saveconfig_bissbs>`]
+
+.. _introlatestcheck_bisref:
+
+Try to reproduce the problem with the latest codebase
+-----------------------------------------------------
+
+ *Verify the regression is not caused by some .config change and check if it
+ still occurs with the latest codebase.* [:ref:`... <introlatestcheck_bissbs>`]
+
+For some readers it might seem unnecessary to check the latest codebase at this
+point, especially if you did that already with a kernel prepared by your
+distributor or face a regression within a stable/longterm series. But it's
+highly recommended for these reasons:
+
+* You will run into any problems caused by your setup before you actually begin
+ a bisection. That will make it a lot easier to differentiate between 'this
+ most likely is some problem in my setup' and 'this change needs to be skipped
+ during the bisection, as the kernel sources at that stage contain an unrelated
+ problem that causes building or booting to fail'.
+
+* These steps will rule out if your problem is caused by some change in the
+ build configuration between the 'working' and the 'broken' kernel. This for
+ example can happen when your distributor enabled an additional security
+ feature in the newer kernel which was disabled or not yet supported by the
+ older kernel. That security feature might get into the way of something you
+ do -- in which case your problem from the perspective of the Linux kernel
+ upstream developers is not a regression, as
+ Documentation/admin-guide/reporting-regressions.rst explains in more detail.
+ You thus would waste your time if you'd try to bisect this.
+
+* If the cause for your regression was already fixed in the latest mainline
+ codebase, you'd perform the bisection for nothing. This holds true for a
+ regression you encountered with a stable/longterm release as well, as they are
+ often caused by problems in mainline changes that were backported -- in which
+ case the problem will have to be fixed in mainline first. Maybe it already was
+ fixed there and the fix is already in the process of being backported.
+
+* For regressions within a stable/longterm series it's furthermore crucial to
+ know if the issue is specific to that series or also happens in the mainline
+ kernel, as the report needs to be sent to different people:
+
+ * Regressions specific to a stable/longterm series are the stable team's
+ responsibility; mainline Linux developers might or might not care.
+
+ * Regressions also happening in mainline are something the regular Linux
+ developers and maintainers have to handle; the stable team does not care
+ and does not need to be involved in the report, they just should be told
+ to backport the fix once it's ready.
+
+ Your report might be ignored if you send it to the wrong party -- and even
+ when you get a reply there is a decent chance that developers tell you to
+ evaluate which of the two cases it is before they take a closer look.
+
+[:ref:`back to step-by-step guide <introlatestcheck_bissbs>`]
+
+.. _checkoutmaster_bisref:
+
+Check out the latest Linux codebase
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check out the latest Linux codebase.*
+ [:ref:`... <checkoutmaster_bissbs>`]
+
+In case you later want to recheck if an ever newer codebase might fix the
+problem, remember to run that ``git fetch --shallow-exclude [...]`` command
+again mentioned earlier to update your local Git repository.
+
+[:ref:`back to step-by-step guide <checkoutmaster_bissbs>`]
+
+.. _build_bisref:
+
+Build your kernel
+~~~~~~~~~~~~~~~~~
+
+ *Build the image and the modules of your first kernel using the config file
+ you prepared.* [:ref:`... <build_bissbs>`]
+
+A lot can go wrong at this stage, but the instructions below will help you help
+yourself. Another subsection explains how to directly package your kernel up as
+deb, rpm or tar file.
+
+Dealing with build errors
+"""""""""""""""""""""""""
+
+When a build error occurs, it might be caused by some aspect of your machine's
+setup that often can be fixed quickly; other times though the problem lies in
+the code and can only be fixed by a developer. A close examination of the
+failure messages coupled with some research on the internet will often tell you
+which of the two it is. To perform such investigation, restart the build
+process like this::
+
+ make V=1
+
+The ``V=1`` activates verbose output, which might be needed to see the actual
+error. To make it easier to spot, this command also omits the ``-j $(nproc
+--all)`` used earlier to utilize every CPU core in the system for the job -- but
+this parallelism also results in some clutter when failures occur.
+
+After a few seconds the build process should run into the error again. Now try
+to find the most crucial line describing the problem. Then search the internet
+for the most important and non-generic section of that line (say 4 to 8 words);
+avoid or remove anything that looks remotely system-specific, like your username
+or local path names like ``/home/username/linux/``. First try your regular
+internet search engine with that string, afterwards search Linux kernel mailing
+lists via `lore.kernel.org/all/ <https://lore.kernel.org/all/>`_.
+
+This most of the time will find something that will explain what is wrong; quite
+often one of the hits will provide a solution for your problem, too. If you
+do not find anything that matches your problem, try again from a different angle
+by modifying your search terms or using another line from the error messages.
+
+In the end, most issues you run into have likely been encountered and
+reported by others already. That includes issues where the cause is not your
+system, but lies in the code. If you run into one of those, you might thus find
+a solution (e.g. a patch) or workaround for your issue, too.
+
+Package your kernel up
+""""""""""""""""""""""
+
+The step-by-step guide uses the default make targets (e.g. 'bzImage' and
+'modules' on x86) to build the image and the modules of your kernel, which later
+steps of the guide then install. You instead can also directly build everything
+and directly package it up by using one of the following targets:
+
+* ``make -j $(nproc --all) bindeb-pkg`` to generate a deb package
+
+* ``make -j $(nproc --all) binrpm-pkg`` to generate a rpm package
+
+* ``make -j $(nproc --all) tarbz2-pkg`` to generate a bz2 compressed tarball
+
+This is just a selection of available make targets for this purpose, see
+``make help`` for others. You can also use these targets after running
+``make -j $(nproc --all)``, as they will pick up everything already built.
+
+If you employ the targets to generate deb or rpm packages, ignore the
+step-by-step guide's instructions on installing and removing your kernel;
+instead install and remove the packages using the package utility for the format
+(e.g. dpkg and rpm) or a package management utility build on top of them (apt,
+aptitude, dnf/yum, zypper, ...). Be aware that the packages generated using
+these two make targets are designed to work on various distributions utilizing
+those formats, they thus will sometimes behave differently than your
+distribution's kernel packages.
+
+[:ref:`back to step-by-step guide <build_bissbs>`]
+
+.. _install_bisref:
+
+Put the kernel in place
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Install the kernel you just built.* [:ref:`... <install_bissbs>`]
+
+What you need to do after executing the command in the step-by-step guide
+depends on the existence and the implementation of ``/sbin/installkernel``
+executable on your distribution.
+
+If installkernel is found, the kernel's build system will delegate the actual
+installation of your kernel image to this executable, which then performs some
+or all of these tasks:
+
+* On almost all Linux distributions installkernel will store your kernel's
+ image in /boot/, usually as '/boot/vmlinuz-<kernelrelease_id>'; often it will
+ put a 'System.map-<kernelrelease_id>' alongside it.
+
+* On most distributions installkernel will then generate an 'initramfs'
+ (sometimes also called 'initrd'), which usually are stored as
+ '/boot/initramfs-<kernelrelease_id>.img' or
+ '/boot/initrd-<kernelrelease_id>'. Commodity distributions rely on this file
+ for booting, hence ensure to execute the make target 'modules_install' first,
+ as your distribution's initramfs generator otherwise will be unable to find
+ the modules that go into the image.
+
+* On some distributions installkernel will then add an entry for your kernel
+ to your bootloader's configuration.
+
+You have to take care of some or all of the tasks yourself, if your
+distribution lacks an installkernel script or does only handle part of them.
+Consult the distribution's documentation for details. If in doubt, install the
+kernel manually::
+
+ sudo install -m 0600 $(make -s image_name) /boot/vmlinuz-$(make -s kernelrelease)
+ sudo install -m 0600 System.map /boot/System.map-$(make -s kernelrelease)
+
+Now generate your initramfs using the tools your distribution provides for this
+process. Afterwards add your kernel to your bootloader configuration and reboot.
+
+[:ref:`back to step-by-step guide <install_bissbs>`]
+
+.. _storagespace_bisref:
+
+Storage requirements per kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check how much storage space the kernel, its modules, and other related files
+ like the initramfs consume.* [:ref:`... <storagespace_bissbs>`]
+
+The kernels built during a bisection consume quite a bit of space in /boot/ and
+/lib/modules/, especially if you enabled debug symbols. That makes it easy to
+fill up volumes during a bisection -- and due to that even kernels which used to
+work earlier might fail to boot. To prevent that you will need to know how much
+space each installed kernel typically requires.
+
+Note, most of the time the pattern '/boot/*$(make -s kernelrelease)*' used in
+the guide will match all files needed to boot your kernel -- but neither the
+path nor the naming scheme are mandatory. On some distributions you thus will
+need to look in different places.
+
+[:ref:`back to step-by-step guide <storagespace_bissbs>`]
+
+.. _tainted_bisref:
+
+Check if your newly built kernel considers itself 'tainted'
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Check if the kernel marked itself as 'tainted'.*
+ [:ref:`... <tainted_bissbs>`]
+
+Linux marks itself as tainted when something happens that potentially leads to
+follow-up errors that look totally unrelated. That is why developers might
+ignore or react scantly to reports from tainted kernels -- unless of course the
+kernel set the flag right when the reported bug occurred.
+
+That's why you want check why a kernel is tainted as explained in
+Documentation/admin-guide/tainted-kernels.rst; doing so is also in your own
+interest, as your testing might be flawed otherwise.
+
+[:ref:`back to step-by-step guide <tainted_bissbs>`]
+
+.. _recheckbroken_bisref:
+
+Check the kernel built from a recent mainline codebase
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Verify if your bug occurs with the newly built kernel.*
+ [:ref:`... <recheckbroken_bissbs>`]
+
+There are a couple of reasons why your bug or regression might not show up with
+the kernel you built from the latest codebase. These are the most frequent:
+
+* The bug was fixed meanwhile.
+
+* What you suspected to be a regression was caused by a change in the build
+ configuration the provider of your kernel carried out.
+
+* Your problem might be a race condition that does not show up with your kernel;
+ the trimmed build configuration, a different setting for debug symbols, the
+ compiler used, and various other things can cause this.
+
+* In case you encountered the regression with a stable/longterm kernel it might
+ be a problem that is specific to that series; the next step in this guide will
+ check this.
+
+[:ref:`back to step-by-step guide <recheckbroken_bissbs>`]
+
+.. _recheckstablebroken_bisref:
+
+Check the kernel built from the latest stable/longterm codebase
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Are you facing a regression within a stable/longterm release, but failed to
+ reproduce it with the kernel you just built using the latest mainline sources?
+ Then check if the latest codebase for the particular series might already fix
+ the problem.* [:ref:`... <recheckstablebroken_bissbs>`]
+
+If this kernel does not show the regression either, there most likely is no need
+for a bisection.
+
+[:ref:`back to step-by-step guide <recheckstablebroken_bissbs>`]
+
+.. _introworkingcheck_bisref:
+
+Ensure the 'good' version is really working well
+------------------------------------------------
+
+ *Check if the kernels you build work fine.*
+ [:ref:`... <introworkingcheck_bissbs>`]
+
+This section will reestablish a known working base. Skipping it might be
+appealing, but is usually a bad idea, as it does something important:
+
+It will ensure the .config file you prepared earlier actually works as expected.
+That is in your own interest, as trimming the configuration is not foolproof --
+and you might be building and testing ten or more kernels for nothing before
+starting to suspect something might be wrong with the build configuration.
+
+That alone is reason enough to spend the time on this, but not the only reason.
+
+Many readers of this guide normally run kernels that are patched, use add-on
+modules, or both. Those kernels thus are not considered 'vanilla' -- therefore
+it's possible that the thing that regressed might never have worked in vanilla
+builds of the 'good' version in the first place.
+
+There is a third reason for those that noticed a regression between
+stable/longterm kernels of different series (e.g. 6.0.13..6.1.5): it will
+ensure the kernel version you assumed to be 'good' earlier in the process (e.g.
+6.0) actually is working.
+
+[:ref:`back to step-by-step guide <introworkingcheck_bissbs>`]
+
+.. _recheckworking_bisref:
+
+Build your own version of the 'good' kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Build your own variant of the working kernel and check if the feature that
+ regressed works as expected with it.* [:ref:`... <recheckworking_bissbs>`]
+
+In case the feature that broke with newer kernels does not work with your first
+self-built kernel, find and resolve the cause before moving on. There are a
+multitude of reasons why this might happen. Some ideas where to look:
+
+* Check the taint status and the output of ``dmesg``, maybe something unrelated
+ went wrong.
+
+* Maybe localmodconfig did something odd and disabled the module required to
+ test the feature? Then you might want to recreate a .config file based on the
+ one from the last working kernel and skip trimming it down; manually disabling
+ some features in the .config might work as well to reduce the build time.
+
+* Maybe it's not a kernel regression and something that is caused by some fluke,
+ a broken initramfs (also known as initrd), new firmware files, or an updated
+ userland software?
+
+* Maybe it was a feature added to your distributor's kernel which vanilla Linux
+ at that point never supported?
+
+Note, if you found and fixed problems with the .config file, you want to use it
+to build another kernel from the latest codebase, as your earlier tests with
+mainline and the latest version from an affected stable/longterm series were
+most likely flawed.
+
+[:ref:`back to step-by-step guide <recheckworking_bissbs>`]
+
+Perform a bisection and validate the result
+-------------------------------------------
+
+ *With all the preparations and precaution builds taken care of, you are now
+ ready to begin the bisection.* [:ref:`... <introbisect_bissbs>`]
+
+The steps in this segment perform and validate the bisection.
+
+[:ref:`back to step-by-step guide <introbisect_bissbs>`].
+
+.. _bisectstart_bisref:
+
+Start the bisection
+~~~~~~~~~~~~~~~~~~~
+
+ *Start the bisection and tell Git about the versions earlier established as
+ 'good' and 'bad'.* [:ref:`... <bisectstart_bissbs>`]
+
+This will start the bisection process; the last of the commands will make Git
+check out a commit round about half-way between the 'good' and the 'bad' changes
+for you to test.
+
+[:ref:`back to step-by-step guide <bisectstart_bissbs>`]
+
+.. _bisectbuild_bisref:
+
+Build a kernel from the bisection point
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Build, install, and boot a kernel from the code Git checked out using the
+ same commands you used earlier.* [:ref:`... <bisectbuild_bissbs>`]
+
+There are two things worth of note here:
+
+* Occasionally building the kernel will fail or it might not boot due some
+ problem in the code at the bisection point. In that case run this command::
+
+ git bisect skip
+
+ Git will then check out another commit nearby which with a bit of luck should
+ work better. Afterwards restart executing this step.
+
+* Those slightly odd looking version identifiers can happen during bisections,
+ because the Linux kernel subsystems prepare their changes for a new mainline
+ release (say 6.2) before its predecessor (e.g. 6.1) is finished. They thus
+ base them on a somewhat earlier point like 6.1-rc1 or even 6.0 -- and then
+ get merged for 6.2 without rebasing nor squashing them once 6.1 is out. This
+ leads to those slightly odd looking version identifiers coming up during
+ bisections.
+
+[:ref:`back to step-by-step guide <bisectbuild_bissbs>`]
+
+.. _bisecttest_bisref:
+
+Bisection checkpoint
+~~~~~~~~~~~~~~~~~~~~
+
+ *Check if the feature that regressed works in the kernel you just built.*
+ [:ref:`... <bisecttest_bissbs>`]
+
+Ensure what you tell Git is accurate: getting it wrong just one time will bring
+the rest of the bisection totally off course, hence all testing after that point
+will be for nothing.
+
+[:ref:`back to step-by-step guide <bisecttest_bissbs>`]
+
+.. _bisectlog_bisref:
+
+Put the bisection log away
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Store Git's bisection log and the current .config file in a safe place.*
+ [:ref:`... <bisectlog_bissbs>`]
+
+As indicated above: declaring just one kernel wrongly as 'good' or 'bad' will
+render the end result of a bisection useless. In that case you'd normally have
+to restart the bisection from scratch. The log can prevent that, as it might
+allow someone to point out where a bisection likely went sideways -- and then
+instead of testing ten or more kernels you might only have to build a few to
+resolve things.
+
+The .config file is put aside, as there is a decent chance that developers might
+ask for it after you report the regression.
+
+[:ref:`back to step-by-step guide <bisectlog_bissbs>`]
+
+.. _revert_bisref:
+
+Try reverting the culprit
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *Try reverting the culprit on top of the latest codebase to see if this fixes
+ your regression.* [:ref:`... <revert_bissbs>`]
+
+This is an optional step, but whenever possible one you should try: there is a
+decent chance that developers will ask you to perform this step when you bring
+the bisection result up. So give it a try, you are in the flow already, building
+one more kernel shouldn't be a big deal at this point.
+
+The step-by-step guide covers everything relevant already except one slightly
+rare thing: did you bisected a regression that also happened with mainline using
+a stable/longterm series, but Git failed to revert the commit in mainline? Then
+try to revert the culprit in the affected stable/longterm series -- and if that
+succeeds, test that kernel version instead.
+
+[:ref:`back to step-by-step guide <revert_bissbs>`]
+
+Cleanup steps during and after following this guide
+---------------------------------------------------
+
+ *During and after following this guide you might want or need to remove some
+ of the kernels you installed.* [:ref:`... <introclosure_bissbs>`]
+
+The steps in this section describe clean-up procedures.
+
+[:ref:`back to step-by-step guide <introclosure_bissbs>`].
+
+.. _makeroom_bisref:
+
+Cleaning up during the bisection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ *To remove one of the kernels you installed, look up its 'kernelrelease'
+ identifier.* [:ref:`... <makeroom_bissbs>`]
+
+The kernels you install during this process are easy to remove later, as its
+parts are only stored in two places and clearly identifiable. You thus do not
+need to worry to mess up your machine when you install a kernel manually (and
+thus bypass your distribution's packaging system): all parts of your kernels are
+relatively easy to remove later.
+
+One of the two places is a directory in /lib/modules/, which holds the modules
+for each installed kernel. This directory is named after the kernel's release
+identifier; hence, to remove all modules for one of the kernels you built,
+simply remove its modules directory in /lib/modules/.
+
+The other place is /boot/, where typically two up to five files will be placed
+during installation of a kernel. All of them usually contain the release name in
+their file name, but how many files and their exact names depend somewhat on
+your distribution's installkernel executable and its initramfs generator. On
+some distributions the ``kernel-install remove...`` command mentioned in the
+step-by-step guide will delete all of these files for you while also removing
+the menu entry for the kernel from your bootloader configuration. On others you
+have to take care of these two tasks yourself. The following command should
+interactively remove the three main files of a kernel with the release name
+'6.0-rc1-local-gcafec0cacaca0'::
+
+ rm -i /boot/{System.map,vmlinuz,initr}-6.0-rc1-local-gcafec0cacaca0
+
+Afterwards check for other files in /boot/ that have
+'6.0-rc1-local-gcafec0cacaca0' in their name and consider deleting them as well.
+Now remove the boot entry for the kernel from your bootloader's configuration;
+the steps to do that vary quite a bit between Linux distributions.
+
+Note, be careful with wildcards like '*' when deleting files or directories
+for kernels manually: you might accidentally remove files of a 6.0.13 kernel
+when all you want is to remove 6.0 or 6.0.1.
+
+[:ref:`back to step-by-step guide <makeroom_bissbs>`]
+
+Cleaning up after the bisection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _finishingtouch_bisref:
+
+ *Once you have finished the bisection, do not immediately remove anything
+ you set up, as you might need a few things again.*
+ [:ref:`... <finishingtouch_bissbs>`]
+
+When you are really short of storage space removing the kernels as described in
+the step-by-step guide might not free as much space as you would like. In that
+case consider running ``rm -rf ~/linux/*`` as well now. This will remove the
+build artifacts and the Linux sources, but will leave the Git repository
+(~/linux/.git/) behind -- a simple ``git reset --hard`` thus will bring the
+sources back.
+
+Removing the repository as well would likely be unwise at this point: there
+is a decent chance developers will ask you to build another kernel to
+perform additional tests -- like testing a debug patch or a proposed fix.
+Details on how to perform those can be found in the section :ref:`Optional
+tasks: test reverts, patches, or later versions <introoptional_bissbs>`.
+
+Additional tests are also the reason why you want to keep the
+~/kernel-config-working file around for a few weeks.
+
+[:ref:`back to step-by-step guide <finishingtouch_bissbs>`]
+
+.. _introoptional_bisref:
+
+Test reverts, patches, or later versions
+----------------------------------------
+
+ *While or after reporting a bug, you might want or potentially will be asked
+ to test reverts, patches, proposed fixes, or other versions.*
+ [:ref:`... <introoptional_bissbs>`]
+
+All the commands used in this section should be pretty straight forward, so
+there is not much to add except one thing: when setting a kernel tag as
+instructed, ensure it is not much longer than the one used in the example, as
+problems will arise if the kernelrelease identifier exceeds 63 characters.
+
+[:ref:`back to step-by-step guide <introoptional_bissbs>`].
+
+
+Additional information
+======================
+
+.. _buildhost_bis:
+
+Build kernels on a different machine
+------------------------------------
+
+To compile kernels on another system, slightly alter the step-by-step guide's
+instructions:
+
+* Start following the guide on the machine where you want to install and test
+ the kernels later.
+
+* After executing ':ref:`Boot into the working kernel and briefly use the
+ apparently broken feature <bootworking_bissbs>`', save the list of loaded
+ modules to a file using ``lsmod > ~/test-machine-lsmod``. Then locate the
+ build configuration for the running kernel (see ':ref:`Start defining the
+ build configuration for your kernel <oldconfig_bisref>`' for hints on where
+ to find it) and store it as '~/test-machine-config-working'. Transfer both
+ files to the home directory of your build host.
+
+* Continue the guide on the build host (e.g. with ':ref:`Ensure to have enough
+ free space for building [...] <diskspace_bissbs>`').
+
+* When you reach ':ref:`Start preparing a kernel build configuration[...]
+ <oldconfig_bissbs>`': before running ``make olddefconfig`` for the first time,
+ execute the following command to base your configuration on the one from the
+ test machine's 'working' kernel::
+
+ cp ~/test-machine-config-working ~/linux/.config
+
+* During the next step to ':ref:`disable any apparently superfluous kernel
+ modules <localmodconfig_bissbs>`' use the following command instead::
+
+ yes '' | make localmodconfig LSMOD=~/lsmod_foo-machine localmodconfig
+
+* Continue the guide, but ignore the instructions outlining how to compile,
+ install, and reboot into a kernel every time they come up. Instead build
+ like this::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig &&
+ make -j $(nproc --all) targz-pkg
+
+ This will generate a gzipped tar file whose name is printed in the last
+ line shown; for example, a kernel with the kernelrelease identifier
+ '6.0.0-rc1-local-g928a87efa423' built for x86 machines usually will
+ be stored as '~/linux/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz'.
+
+ Copy that file to your test machine's home directory.
+
+* Switch to the test machine to check if you have enough space to hold another
+ kernel. Then extract the file you transferred::
+
+ sudo tar -xvzf ~/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz -C /
+
+ Afterwards :ref:`generate the initramfs and add the kernel to your boot
+ loader's configuration <install_bisref>`; on some distributions the following
+ command will take care of both these tasks::
+
+ sudo /sbin/installkernel 6.0.0-rc1-local-g928a87efa423 /boot/vmlinuz-6.0.0-rc1-local-g928a87efa423
+
+ Now reboot and ensure you started the intended kernel.
+
+This approach even works when building for another architecture: just install
+cross-compilers and add the appropriate parameters to every invocation of make
+(e.g. ``make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- [...]``).
+
+Additional reading material
+---------------------------
+
+* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
+ `fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
+ in the Git documentation.
+* `Working with git bisect <https://nathanchance.dev/posts/working-with-git-bisect/>`_
+ from kernel developer Nathan Chancellor.
+* `Using Git bisect to figure out when brokenness was introduced <http://webchick.net/node/99>`_.
+* `Fully automated bisecting with 'git bisect run' <https://lwn.net/Articles/317154>`_.
+
+..
+ end-of-content
+..
+ This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
+ you spot a typo or small mistake, feel free to let him know directly and
+ he'll fix it. You are free to do the same in a mostly informal way if you
+ want to contribute changes to the text -- but for copyright reasons please CC
+ linux-doc@vger.kernel.org and 'sign-off' your contribution as
+ Documentation/process/submitting-patches.rst explains in the section 'Sign
+ your work - the Developer's Certificate of Origin'.
+..
+ This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
+ of the file. If you want to distribute this text under CC-BY-4.0 only,
+ please use 'The Linux kernel development community' for author attribution
+ and link this as source:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
+
+..
+ Note: Only the content of this RST file as found in the Linux kernel sources
+ is available under CC-BY-4.0, as versions of this text that were processed
+ (for example by the kernel's build system) might contain content taken from
+ files which use a more restrictive license.
diff --git a/Documentation/admin-guide/workload-tracing.rst b/Documentation/admin-guide/workload-tracing.rst
index b2e254ec8ee8..35963491b9f1 100644
--- a/Documentation/admin-guide/workload-tracing.rst
+++ b/Documentation/admin-guide/workload-tracing.rst
@@ -82,8 +82,8 @@ Install tools to build Linux kernel and tools in kernel repository.
scripts/ver_linux is a good way to check if your system already has
the necessary tools::
- sudo apt-get build-essentials flex bison yacc
- sudo apt install libelf-dev systemtap-sdt-dev libaudit-dev libslang2-dev libperl-dev libdw-dev
+ sudo apt-get install build-essential flex bison yacc
+ sudo apt install libelf-dev systemtap-sdt-dev libslang2-dev libperl-dev libdw-dev
cscope is a good tool to browse kernel sources. Let's install it now::
@@ -196,11 +196,11 @@ Let’s checkout the latest Linux repository and build cscope database::
cscope -R -p10 # builds cscope.out database before starting browse session
cscope -d -p10 # starts browse session on cscope.out database
-Note: Run "cscope -R -p10" to build the database and c"scope -d -p10" to
-enter into the browsing session. cscope by default cscope.out database.
-To get out of this mode press ctrl+d. -p option is used to specify the
-number of file path components to display. -p10 is optimal for browsing
-kernel sources.
+Note: Run "cscope -R -p10" to build the database and "cscope -d -p10" to
+enter into the browsing session. cscope by default uses the cscope.out
+database. To get out of this mode press ctrl+d. -p option is used to
+specify the number of file path components to display. -p10 is optimal
+for browsing kernel sources.
What is perf and how do we use it?
==================================
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index b67772cf36d6..c85cd327af28 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -34,22 +34,6 @@ When mounting an XFS filesystem, the following options are accepted.
to the file. Specifying a fixed ``allocsize`` value turns off
the dynamic behaviour.
- attr2 or noattr2
- The options enable/disable an "opportunistic" improvement to
- be made in the way inline extended attributes are stored
- on-disk. When the new form is used for the first time when
- ``attr2`` is selected (either when setting or removing extended
- attributes) the on-disk superblock feature bit field will be
- updated to reflect this format being in use.
-
- The default behaviour is determined by the on-disk feature
- bit indicating that ``attr2`` behaviour is active. If either
- mount option is set, then that becomes the new default used
- by the filesystem.
-
- CRC enabled filesystems always use the ``attr2`` format, and so
- will reject the ``noattr2`` mount option if it is set.
-
discard or nodiscard (default)
Enable/disable the issuing of commands to let the block
device reclaim space freed by the filesystem. This is
@@ -75,12 +59,6 @@ When mounting an XFS filesystem, the following options are accepted.
across the entire filesystem rather than just on directories
configured to use it.
- ikeep or noikeep (default)
- When ``ikeep`` is specified, XFS does not delete empty inode
- clusters and keeps them around on disk. When ``noikeep`` is
- specified, empty inode clusters are returned to the free
- space pool.
-
inode32 or inode64 (default)
When ``inode32`` is specified, it indicates that XFS limits
inode creation to locations which will not result in inode
@@ -124,6 +102,14 @@ When mounting an XFS filesystem, the following options are accepted.
controls the size of each buffer and so is also relevant to
this case.
+ lifetime (default) or nolifetime
+ Enable data placement based on write life time hints provided
+ by the user. This turns on co-allocation of data of similar
+ life times when statistically favorable to reduce garbage
+ collection cost.
+
+ These options are only available for zoned rt file systems.
+
logbsize=value
Set the size of each in-memory log buffer. The size may be
specified in bytes, or in kilobytes with a "k" suffix.
@@ -143,6 +129,25 @@ When mounting an XFS filesystem, the following options are accepted.
optional, and the log section can be separate from the data
section or contained within it.
+ max_atomic_write=value
+ Set the maximum size of an atomic write. The size may be
+ specified in bytes, in kilobytes with a "k" suffix, in megabytes
+ with a "m" suffix, or in gigabytes with a "g" suffix. The size
+ cannot be larger than the maximum write size, larger than the
+ size of any allocation group, or larger than the size of a
+ remapping operation that the log can complete atomically.
+
+ The default value is to set the maximum I/O completion size
+ to allow each CPU to handle one at a time.
+
+ max_open_zones=value
+ Specify the max number of zones to keep open for writing on a
+ zoned rt device. Many open zones aids file data separation
+ but may impact performance on HDDs.
+
+ If ``max_open_zones`` is not specified, the value is determined
+ by the capabilities and the size of the zoned rt device.
+
noalign
Data allocations will not be aligned at stripe unit
boundaries. This is only relevant to filesystems created
@@ -226,9 +231,8 @@ latest version and try again.
The deprecation will take place in two parts. Support for mounting V4
filesystems can now be disabled at kernel build time via Kconfig option.
-The option will default to yes until September 2025, at which time it
-will be changed to default to no. In September 2030, support will be
-removed from the codebase entirely.
+These options were changed to default to no in September 2025. In
+September 2030, support will be removed from the codebase entirely.
Note: Distributors may choose to withdraw V4 format support earlier than
the dates listed above.
@@ -241,8 +245,6 @@ Deprecated Mount Options
============================ ================
Mounting with V4 filesystem September 2030
Mounting ascii-ci filesystem September 2030
-ikeep/noikeep September 2025
-attr2/noattr2 September 2025
============================ ================
@@ -258,6 +260,8 @@ Removed Mount Options
osyncisdsync/osyncisosync v4.0
barrier v4.19
nobarrier v4.19
+ ikeep/noikeep v6.18
+ attr2/noattr2 v6.18
=========================== =======
sysctls
@@ -285,9 +289,6 @@ The following sysctls are available for the XFS filesystem:
removes unused preallocation from clean inodes and releases
the unused space back to the free pool.
- fs.xfs.speculative_cow_prealloc_lifetime
- This is an alias for speculative_prealloc_lifetime.
-
fs.xfs.error_level (Min: 0 Default: 3 Max: 11)
A volume knob for error reporting when internal errors occur.
This will generate detailed messages & backtraces for filesystem
@@ -314,17 +315,6 @@ The following sysctls are available for the XFS filesystem:
This option is intended for debugging only.
- fs.xfs.irix_symlink_mode (Min: 0 Default: 0 Max: 1)
- Controls whether symlinks are created with mode 0777 (default)
- or whether their mode is affected by the umask (irix mode).
-
- fs.xfs.irix_sgid_inherit (Min: 0 Default: 0 Max: 1)
- Controls files created in SGID directories.
- If the group ID of the new file does not match the effective group
- ID or one of the supplementary group IDs of the parent dir, the
- ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl
- is set.
-
fs.xfs.inherit_sync (Min: 0 Default: 1 Max: 1)
Setting this to "1" will cause the "sync" flag set
by the **xfs_io(8)** chattr command on a directory to be
@@ -360,24 +350,20 @@ The following sysctls are available for the XFS filesystem:
Deprecated Sysctls
==================
-=========================================== ================
- Name Removal Schedule
-=========================================== ================
-fs.xfs.irix_sgid_inherit September 2025
-fs.xfs.irix_symlink_mode September 2025
-fs.xfs.speculative_cow_prealloc_lifetime September 2025
-=========================================== ================
-
+None currently.
Removed Sysctls
===============
-============================= =======
- Name Removed
-============================= =======
- fs.xfs.xfsbufd_centisec v4.0
- fs.xfs.age_buffer_centisecs v4.0
-============================= =======
+========================================== =======
+ Name Removed
+========================================== =======
+ fs.xfs.xfsbufd_centisec v4.0
+ fs.xfs.age_buffer_centisecs v4.0
+ fs.xfs.irix_symlink_mode v6.18
+ fs.xfs.irix_sgid_inherit v6.18
+ fs.xfs.speculative_cow_prealloc_lifetime v6.18
+========================================== =======
Error handling
==============
@@ -542,3 +528,24 @@ The interesting knobs for XFS workqueues are as follows:
nice Relative priority of scheduling the threads. These are the
same nice levels that can be applied to userspace processes.
============ ===========
+
+Zoned Filesystems
+=================
+
+For zoned file systems, the following attributes are exposed in:
+
+ /sys/fs/xfs/<dev>/zoned/
+
+ max_open_zones (Min: 1 Default: Varies Max: UINTMAX)
+ This read-only attribute exposes the maximum number of open zones
+ available for data placement. The value is determined at mount time and
+ is limited by the capabilities of the backing zoned device, file system
+ size and the max_open_zones mount option.
+
+ zonegc_low_space (Min: 0 Default: 0 Max: 100)
+ Define a percentage for how much of the unused space that GC should keep
+ available for writing. A high value will reclaim more of the space
+ occupied by unused blocks, creating a larger buffer against write
+ bursts at the cost of increased write amplification. Regardless
+ of this value, garbage collection will always aim to free a minimum
+ amount of blocks to keep max_open_zones open for data placement purposes.
diff --git a/Documentation/arch/arm/mem_alignment.rst b/Documentation/arch/arm/mem_alignment.rst
index aa22893b62bc..64bd77959300 100644
--- a/Documentation/arch/arm/mem_alignment.rst
+++ b/Documentation/arch/arm/mem_alignment.rst
@@ -12,7 +12,7 @@ ones.
Of course this is a bad idea to rely on the alignment trap to perform
unaligned memory access in general. If those access are predictable, you
-are better to use the macros provided by include/asm/unaligned.h. The
+are better to use the macros provided by include/linux/unaligned.h. The
alignment trap can fixup misaligned access for the exception cases, but at
a high performance cost. It better be rare.
diff --git a/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst b/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst
index 2945e0e33104..301aa30890ae 100644
--- a/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst
+++ b/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst
@@ -359,7 +359,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
descriptor you want a callback to be called at the end of the transfer
(dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()).
Depending on the direction, set the callback on the descriptor that finishes
- the overal transfer:
+ the overall transfer:
* DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor
* DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor
@@ -371,7 +371,7 @@ Driver updates for STM32 DMA-MDMA chaining support in foo driver
As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue
STM32 MDMA channel before STM32 DMA channel.
- If any, your callback will be called to warn you about the end of the overal
+ If any, your callback will be called to warn you about the end of the overall
transfer or the period completion.
Don't forget to terminate both channels. STM32 DMA channel is configured in
diff --git a/Documentation/arch/arm/stm32/stm32f746-overview.rst b/Documentation/arch/arm/stm32/stm32f746-overview.rst
index 78befddc7740..335f0855a858 100644
--- a/Documentation/arch/arm/stm32/stm32f746-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32f746-overview.rst
@@ -15,7 +15,7 @@ It features:
- SD/MMC/SDIO support
- Ethernet controller
- USB OTFG FS & HS controllers
-- I2C, SPI, CAN busses support
+- I2C, SPI, CAN buses support
- Several 16 & 32 bits general purpose timers
- Serial Audio interface
- LCD controller
diff --git a/Documentation/arch/arm/stm32/stm32f769-overview.rst b/Documentation/arch/arm/stm32/stm32f769-overview.rst
index e482980ddf21..ef31aadee68f 100644
--- a/Documentation/arch/arm/stm32/stm32f769-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32f769-overview.rst
@@ -15,7 +15,7 @@ It features:
- SD/MMC/SDIO support*2
- Ethernet controller
- USB OTFG FS & HS controllers
-- I2C*4, SPI*6, CAN*3 busses support
+- I2C*4, SPI*6, CAN*3 buses support
- Several 16 & 32 bits general purpose timers
- Serial Audio interface*2
- LCD controller
diff --git a/Documentation/arch/arm/stm32/stm32h743-overview.rst b/Documentation/arch/arm/stm32/stm32h743-overview.rst
index 4e15f1a42730..7659df24d362 100644
--- a/Documentation/arch/arm/stm32/stm32h743-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32h743-overview.rst
@@ -15,7 +15,7 @@ It features:
- SD/MMC/SDIO support
- Ethernet controller
- USB OTFG FS & HS controllers
-- I2C, SPI, CAN busses support
+- I2C, SPI, CAN buses support
- Several 16 & 32 bits general purpose timers
- Serial Audio interface
- LCD controller
diff --git a/Documentation/arch/arm/stm32/stm32h750-overview.rst b/Documentation/arch/arm/stm32/stm32h750-overview.rst
index 0e51235c9547..be032b77d1f1 100644
--- a/Documentation/arch/arm/stm32/stm32h750-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32h750-overview.rst
@@ -15,7 +15,7 @@ It features:
- SD/MMC/SDIO support
- Ethernet controller
- USB OTFG FS & HS controllers
-- I2C, SPI, CAN busses support
+- I2C, SPI, CAN buses support
- Several 16 & 32 bits general purpose timers
- Serial Audio interface
- LCD controller
diff --git a/Documentation/arch/arm/stm32/stm32mp13-overview.rst b/Documentation/arch/arm/stm32/stm32mp13-overview.rst
index 3bb9492dad49..b5e9589fb06f 100644
--- a/Documentation/arch/arm/stm32/stm32mp13-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32mp13-overview.rst
@@ -24,7 +24,7 @@ More details:
- ADC/DAC
- USB EHCI/OHCI controllers
- USB OTG
-- I2C, SPI, CAN busses support
+- I2C, SPI, CAN buses support
- Several general purpose timers
- Serial Audio interface
- LCD controller
diff --git a/Documentation/arch/arm/stm32/stm32mp151-overview.rst b/Documentation/arch/arm/stm32/stm32mp151-overview.rst
index f42a2ac309c0..b58c256ede9a 100644
--- a/Documentation/arch/arm/stm32/stm32mp151-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32mp151-overview.rst
@@ -23,7 +23,7 @@ More details:
- ADC/DAC
- USB EHCI/OHCI controllers
- USB OTG
-- I2C, SPI busses support
+- I2C, SPI buses support
- Several general purpose timers
- Serial Audio interface
- LCD-TFT controller
diff --git a/Documentation/arch/arm64/amu.rst b/Documentation/arch/arm64/amu.rst
index 01f2de2b0450..ac1b3f0e211d 100644
--- a/Documentation/arch/arm64/amu.rst
+++ b/Documentation/arch/arm64/amu.rst
@@ -80,7 +80,7 @@ bypass the setting of AMUSERENR_EL0 to trap accesses from EL0 (userspace) to
EL1 (kernel). Therefore, firmware should still ensure accesses to AMU registers
are not trapped in EL2/EL3.
-The fixed counters of AMUv1 are accessible though the following system
+The fixed counters of AMUv1 are accessible through the following system
register definitions:
- SYS_AMEVCNTR0_CORE_EL0
diff --git a/Documentation/arch/arm64/arm-cca.rst b/Documentation/arch/arm64/arm-cca.rst
new file mode 100644
index 000000000000..c48b7d4ab6bd
--- /dev/null
+++ b/Documentation/arch/arm64/arm-cca.rst
@@ -0,0 +1,69 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Arm Confidential Compute Architecture
+=====================================
+
+Arm systems that support the Realm Management Extension (RME) contain
+hardware to allow a VM guest to be run in a way which protects the code
+and data of the guest from the hypervisor. It extends the older "two
+world" model (Normal and Secure World) into four worlds: Normal, Secure,
+Root and Realm. Linux can then also be run as a guest to a monitor
+running in the Realm world.
+
+The monitor running in the Realm world is known as the Realm Management
+Monitor (RMM) and implements the Realm Management Monitor
+specification[1]. The monitor acts a bit like a hypervisor (e.g. it runs
+in EL2 and manages the stage 2 page tables etc of the guests running in
+Realm world), however much of the control is handled by a hypervisor
+running in the Normal World. The Normal World hypervisor uses the Realm
+Management Interface (RMI) defined by the RMM specification to request
+the RMM to perform operations (e.g. mapping memory or executing a vCPU).
+
+The RMM defines an environment for guests where the address space (IPA)
+is split into two. The lower half is protected - any memory that is
+mapped in this half cannot be seen by the Normal World and the RMM
+restricts what operations the Normal World can perform on this memory
+(e.g. the Normal World cannot replace pages in this region without the
+guest's cooperation). The upper half is shared, the Normal World is free
+to make changes to the pages in this region, and is able to emulate MMIO
+devices in this region too.
+
+A guest running in a Realm may also communicate with the RMM using the
+Realm Services Interface (RSI) to request changes in its environment or
+to perform attestation about its environment. In particular it may
+request that areas of the protected address space are transitioned
+between 'RAM' and 'EMPTY' (in either direction). This allows a Realm
+guest to give up memory to be returned to the Normal World, or to
+request new memory from the Normal World. Without an explicit request
+from the Realm guest the RMM will otherwise prevent the Normal World
+from making these changes.
+
+Linux as a Realm Guest
+----------------------
+
+To run Linux as a guest within a Realm, the following must be provided
+either by the VMM or by a `boot loader` run in the Realm before Linux:
+
+ * All protected RAM described to Linux (by DT or ACPI) must be marked
+ RIPAS RAM before handing control over to Linux.
+
+ * MMIO devices must be either unprotected (e.g. emulated by the Normal
+ World) or marked RIPAS DEV.
+
+ * MMIO devices emulated by the Normal World and used very early in boot
+ (specifically earlycon) must be specified in the upper half of IPA.
+ For earlycon this can be done by specifying the address on the
+ command line, e.g. with an IPA size of 33 bits and the base address
+ of the emulated UART at 0x1000000: ``earlycon=uart,mmio,0x101000000``
+
+ * Linux will use bounce buffers for communicating with unprotected
+ devices. It will transition some protected memory to RIPAS EMPTY and
+ expect to be able to access unprotected pages at the same IPA address
+ but with the highest valid IPA bit set. The expectation is that the
+ VMM will remove the physical pages from the protected mapping and
+ provide those pages as unprotected pages.
+
+References
+----------
+[1] https://developer.arm.com/documentation/den0137/
diff --git a/Documentation/arch/arm64/asymmetric-32bit.rst b/Documentation/arch/arm64/asymmetric-32bit.rst
index 64a0b505da7d..57b8d7476f71 100644
--- a/Documentation/arch/arm64/asymmetric-32bit.rst
+++ b/Documentation/arch/arm64/asymmetric-32bit.rst
@@ -55,7 +55,7 @@ sysfs
The subset of CPUs capable of running 32-bit tasks is described in
``/sys/devices/system/cpu/aarch32_el0`` and is documented further in
-``Documentation/ABI/testing/sysfs-devices-system-cpu``.
+Documentation/ABI/testing/sysfs-devices-system-cpu.
**Note:** CPUs are advertised by this file as they are detected and so
late-onlining of 32-bit-capable CPUs can result in the file contents
@@ -153,3 +153,11 @@ asymmetric system, a broken guest at EL1 could still attempt to execute
mode will return to host userspace with an ``exit_reason`` of
``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
+
+NOHZ FULL
+---------
+
+To avoid perturbing an adaptive-ticks CPU (specified using
+``nohz_full=``) when a 32-bit task is forcefully migrated, these CPUs
+are treated as 64-bit-only when support for asymmetric 32-bit systems
+is enabled.
diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
index b57776a68f15..26efca09aef3 100644
--- a/Documentation/arch/arm64/booting.rst
+++ b/Documentation/arch/arm64/booting.rst
@@ -41,6 +41,9 @@ to automatically locate and size all RAM, or it may use knowledge of
the RAM in the machine, or any other method the boot loader designer
sees fit.)
+For Arm Confidential Compute Realms this includes ensuring that all
+protected RAM has a Realm IPA state (RIPAS) of "RAM".
+
2. Setup the device tree
-------------------------
@@ -220,6 +223,47 @@ Before jumping into the kernel, the following conditions must be met:
- SCR_EL3.HCE (bit 8) must be initialised to 0b1.
+ For systems with a GICv5 interrupt controller to be used in v5 mode:
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - ICH_HFGRTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PPI_HMRn_EL1 (bit 16) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_IAFFIDR_EL1 (bit 7) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_HPPIR_EL1 (bit 4) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_HAPR_EL1 (bit 3) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_IDRn_EL1 (bit 1) must be initialised to 0b1.
+ - ICH_HFGRTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1.
+
+ - ICH_HFGWTR_EL2.ICC_PPI_ACTIVERn_EL1 (bit 20) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_PRIORITYRn_EL1 (bit 19) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_PENDRn_EL1 (bit 18) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PPI_ENABLERn_EL1 (bit 17) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_ICSR_EL1 (bit 6) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_PCR_EL1 (bit 5) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_CR0_EL1 (bit 2) must be initialised to 0b1.
+ - ICH_HFGWTR_EL2.ICC_APR_EL1 (bit 0) must be initialised to 0b1.
+
+ - ICH_HFGITR_EL2.GICRCDNMIA (bit 10) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICRCDIA (bit 9) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDDI (bit 8) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDEOI (bit 7) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDHM (bit 6) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDRCFG (bit 5) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDPEND (bit 4) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDAFF (bit 3) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDPRI (bit 2) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDDIS (bit 1) must be initialised to 0b1.
+ - ICH_HFGITR_EL2.GICCDEN (bit 0) must be initialised to 0b1.
+
+ - The DT or ACPI tables must describe a GICv5 interrupt controller.
+
For systems with a GICv3 interrupt controller to be used in v3 mode:
- If EL3 is present:
@@ -231,7 +275,7 @@ Before jumping into the kernel, the following conditions must be met:
- If the kernel is entered at EL1:
- - ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
+ - ICC_SRE_EL2.Enable (bit 3) must be initialised to 0b1
- ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
- The DT or ACPI tables must describe a GICv3 interrupt controller.
@@ -285,6 +329,12 @@ Before jumping into the kernel, the following conditions must be met:
- SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
+ For CPUs with the Fine Grained Traps 2 (FEAT_FGT2) extension present:
+
+ - If EL3 is present and the kernel is entered at EL2:
+
+ - SCR_EL3.FGTEn2 (bit 59) must be initialised to 0b1.
+
For CPUs with support for HCRX_EL2 (FEAT_HCX) present:
- If EL3 is present and the kernel is entered at EL2:
@@ -341,13 +391,13 @@ Before jumping into the kernel, the following conditions must be met:
- SMCR_EL2.LEN must be initialised to the same value for all CPUs the
kernel will execute on.
- - HWFGRTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01.
+ - HFGRTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01.
- - HWFGWTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01.
+ - HFGWTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01.
- - HWFGRTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.
+ - HFGRTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.
- - HWFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.
+ - HFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.
For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64):
@@ -379,12 +429,63 @@ Before jumping into the kernel, the following conditions must be met:
- SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
+ For CPUs with the Branch Record Buffer Extension (FEAT_BRBE):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.SBRBE (bits 33:32) must be initialised to 0b01 or 0b11.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - BRBCR_EL2.CC (bit 3) must be initialised to 0b1.
+ - BRBCR_EL2.MPRED (bit 4) must be initialised to 0b1.
+
+ - HDFGRTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1.
+ - HDFGRTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1.
+ - HDFGRTR_EL2.nBRBIDR (bit 59) must be initialised to 0b1.
+
+ - HDFGWTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1.
+ - HDFGWTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1.
+
+ - HFGITR_EL2.nBRBIALL (bit 56) must be initialised to 0b1.
+ - HFGITR_EL2.nBRBINJ (bit 55) must be initialised to 0b1.
+
+ For CPUs with the Performance Monitors Extension (FEAT_PMUv3p9):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.EnPM2 (bit 7) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HDFGRTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
+ - HDFGRTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
+ - HDFGRTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+
+ - HDFGWTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+
+ For CPUs with SPE data source filtering (FEAT_SPE_FDS):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.EnPMS3 (bit 42) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HDFGRTR2_EL2.nPMSDSFR_EL1 (bit 19) must be initialised to 0b1.
+ - HDFGWTR2_EL2.nPMSDSFR_EL1 (bit 19) must be initialised to 0b1.
+
For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS):
- If the kernel is entered at EL1 and EL2 is present:
- HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
+ - HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor
+ must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`.
+
For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):
- If EL3 is present:
@@ -411,6 +512,50 @@ Before jumping into the kernel, the following conditions must be met:
- HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
+ - For CPUs with Guarded Control Stacks (FEAT_GCS):
+
+ - GCSCR_EL1 must be initialised to 0.
+
+ - GCSCRE0_EL1 must be initialised to 0.
+
+ - If EL3 is present:
+
+ - SCR_EL3.GCSEn (bit 39) must be initialised to 0b1.
+
+ - If EL2 is present:
+
+ - GCSCR_EL2 must be initialised to 0.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HCRX_EL2.GCSEn must be initialised to 0b1.
+
+ - HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1.
+
+ - HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1.
+
+ - HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1.
+
+ - HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
+
+ - HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
+
+ - HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
+
+ - HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
+
+ - For CPUs with debug architecture i.e FEAT_Debugv8pN (all versions):
+
+ - If EL3 is present:
+
+ - MDCR_EL3.TDA (bit 9) must be initialized to 0b0
+
+ - For CPUs with FEAT_PMUv3:
+
+ - If EL3 is present:
+
+ - MDCR_EL3.TPM (bit 6) must be initialized to 0b0
+
The requirements described above for CPU mode, caches, MMUs, architected
timers, coherency and system registers apply to all CPUs. All CPUs must
enter the kernel in the same exception level. Where the values documented
diff --git a/Documentation/arch/arm64/cpu-feature-registers.rst b/Documentation/arch/arm64/cpu-feature-registers.rst
index 44f9bd78539d..add66afc7b03 100644
--- a/Documentation/arch/arm64/cpu-feature-registers.rst
+++ b/Documentation/arch/arm64/cpu-feature-registers.rst
@@ -72,14 +72,15 @@ there are some issues with their usage.
process could be migrated to another CPU by the time it uses the
register value, unless the CPU affinity is set. Hence, there is no
guarantee that the value reflects the processor that it is
- currently executing on. The REVIDR is not exposed due to this
- constraint, as REVIDR makes sense only in conjunction with the
- MIDR. Alternately, MIDR_EL1 and REVIDR_EL1 are exposed via sysfs
- at::
+ currently executing on. REVIDR and AIDR are not exposed due to this
+ constraint, as these registers only make sense in conjunction with
+ the MIDR. Alternately, MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are exposed
+ via sysfs at::
/sys/devices/system/cpu/cpu$ID/regs/identification/
- \- midr
- \- revidr
+ \- midr_el1
+ \- revidr_el1
+ \- aidr_el1
3. Implementation
--------------------
@@ -152,6 +153,8 @@ infrastructure:
+------------------------------+---------+---------+
| DIT | [51-48] | y |
+------------------------------+---------+---------+
+ | MPAM | [43-40] | n |
+ +------------------------------+---------+---------+
| SVE | [35-32] | y |
+------------------------------+---------+---------+
| GIC | [27-24] | n |
diff --git a/Documentation/arch/arm64/cpu-hotplug.rst b/Documentation/arch/arm64/cpu-hotplug.rst
new file mode 100644
index 000000000000..8fb438bf7781
--- /dev/null
+++ b/Documentation/arch/arm64/cpu-hotplug.rst
@@ -0,0 +1,79 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _cpuhp_index:
+
+====================
+CPU Hotplug and ACPI
+====================
+
+CPU hotplug in the arm64 world is commonly used to describe the kernel taking
+CPUs online/offline using PSCI. This document is about ACPI firmware allowing
+CPUs that were not available during boot to be added to the system later.
+
+``possible`` and ``present`` refer to the state of the CPU as seen by linux.
+
+
+CPU Hotplug on physical systems - CPUs not present at boot
+----------------------------------------------------------
+
+Physical systems need to mark a CPU that is ``possible`` but not ``present`` as
+being ``present``. An example would be a dual socket machine, where the package
+in one of the sockets can be replaced while the system is running.
+
+This is not supported.
+
+In the arm64 world CPUs are not a single device but a slice of the system.
+There are no systems that support the physical addition (or removal) of CPUs
+while the system is running, and ACPI is not able to sufficiently describe
+them.
+
+e.g. New CPUs come with new caches, but the platform's cache topology is
+described in a static table, the PPTT. How caches are shared between CPUs is
+not discoverable, and must be described by firmware.
+
+e.g. The GIC redistributor for each CPU must be accessed by the driver during
+boot to discover the system wide supported features. ACPI's MADT GICC
+structures can describe a redistributor associated with a disabled CPU, but
+can't describe whether the redistributor is accessible, only that it is not
+'always on'.
+
+arm64's ACPI tables assume that everything described is ``present``.
+
+
+CPU Hotplug on virtual systems - CPUs not enabled at boot
+---------------------------------------------------------
+
+Virtual systems have the advantage that all the properties the system will
+ever have can be described at boot. There are no power-domain considerations
+as such devices are emulated.
+
+CPU Hotplug on virtual systems is supported. It is distinct from physical
+CPU Hotplug as all resources are described as ``present``, but CPUs may be
+marked as disabled by firmware. Only the CPU's online/offline behaviour is
+influenced by firmware. An example is where a virtual machine boots with a
+single CPU, and additional CPUs are added once a cloud orchestrator deploys
+the workload.
+
+For a virtual machine, the VMM (e.g. Qemu) plays the part of firmware.
+
+Virtual hotplug is implemented as a firmware policy affecting which CPUs can be
+brought online. Firmware can enforce its policy via PSCI's return codes. e.g.
+``DENIED``.
+
+The ACPI tables must describe all the resources of the virtual machine. CPUs
+that firmware wishes to disable either from boot (or later) should not be
+``enabled`` in the MADT GICC structures, but should have the ``online capable``
+bit set, to indicate they can be enabled later. The boot CPU must be marked as
+``enabled``. The 'always on' GICR structure must be used to describe the
+redistributors.
+
+CPUs described as ``online capable`` but not ``enabled`` can be set to enabled
+by the DSDT's Processor object's _STA method. On virtual systems the _STA method
+must always report the CPU as ``present``. Changes to the firmware policy can
+be notified to the OS via device-check or eject-request.
+
+CPUs described as ``enabled`` in the static table, should not have their _STA
+modified dynamically by firmware. Soft-restart features such as kexec will
+re-read the static properties of the system from these static tables, and
+may malfunction if these no longer describe the running system. Linux will
+re-discover the dynamic properties of the system from the _STA method later
+during boot.
diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst
index ced7b335e2e0..a15df4956849 100644
--- a/Documentation/arch/arm64/elf_hwcaps.rst
+++ b/Documentation/arch/arm64/elf_hwcaps.rst
@@ -16,9 +16,9 @@ architected discovery mechanism available to userspace code at EL0. The
kernel exposes the presence of these features to userspace through a set
of flags called hwcaps, exposed in the auxiliary vector.
-Userspace software can test for features by acquiring the AT_HWCAP or
-AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
-flags are set, e.g.::
+Userspace software can test for features by acquiring the AT_HWCAP,
+AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing
+whether the relevant flags are set, e.g.::
bool floating_point_is_present(void)
{
@@ -170,26 +170,86 @@ HWCAP_PACG
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
Documentation/arch/arm64/pointer-authentication.rst.
+HWCAP_GCS
+ Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as
+ described by Documentation/arch/arm64/gcs.rst.
+
+HWCAP_CMPBR
+ Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0010.
+
+HWCAP_FPRCVT
+ Functionality implied by ID_AA64ISAR3_EL1.FPRCVT == 0b0001.
+
+HWCAP_F8MM8
+ Functionality implied by ID_AA64FPFR0_EL1.F8MM8 == 0b0001.
+
+HWCAP_F8MM4
+ Functionality implied by ID_AA64FPFR0_EL1.F8MM4 == 0b0001.
+
+HWCAP_SVE_F16MM
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.F16MM == 0b0001.
+
+HWCAP_SVE_ELTPERM
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.ELTPERM == 0b0001.
+
+HWCAP_SVE_AES2
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.AES == 0b0011.
+
+HWCAP_SVE_BFSCALE
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.B16B16 == 0b0010.
+
+HWCAP_SVE2P2
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SVEver == 0b0011.
+
+HWCAP_SME2P2
+ Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0011.
+
+HWCAP_SME_SBITPERM
+ Functionality implied by ID_AA64SMFR0_EL1.SBitPerm == 0b1.
+
+HWCAP_SME_AES
+ Functionality implied by ID_AA64SMFR0_EL1.AES == 0b1.
+
+HWCAP_SME_SFEXPA
+ Functionality implied by ID_AA64SMFR0_EL1.SFEXPA == 0b1.
+
+HWCAP_SME_STMOP
+ Functionality implied by ID_AA64SMFR0_EL1.STMOP == 0b1.
+
+HWCAP_SME_SMOP4
+ Functionality implied by ID_AA64SMFR0_EL1.SMOP4 == 0b1.
+
HWCAP2_DCPODP
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
HWCAP2_SVE2
- Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SVEver == 0b0001.
HWCAP2_SVEAES
- Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.AES == 0b0001.
HWCAP2_SVEPMULL
- Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.AES == 0b0010.
HWCAP2_SVEBITPERM
- Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.BitPerm == 0b0001.
HWCAP2_SVESHA3
- Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SHA3 == 0b0001.
HWCAP2_SVESM4
- Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SM4 == 0b0001.
HWCAP2_FLAGM2
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
@@ -198,16 +258,20 @@ HWCAP2_FRINT
Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
HWCAP2_SVEI8MM
- Functionality implied by ID_AA64ZFR0_EL1.I8MM == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.I8MM == 0b0001.
HWCAP2_SVEF32MM
- Functionality implied by ID_AA64ZFR0_EL1.F32MM == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.F32MM == 0b0001.
HWCAP2_SVEF64MM
- Functionality implied by ID_AA64ZFR0_EL1.F64MM == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.F64MM == 0b0001.
HWCAP2_SVEBF16
- Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.BF16 == 0b0001.
HWCAP2_I8MM
Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
@@ -273,7 +337,8 @@ HWCAP2_EBF16
Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
HWCAP2_SVE_EBF16
- Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0010.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.BF16 == 0b0010.
HWCAP2_CSSC
Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0001.
@@ -282,7 +347,8 @@ HWCAP2_RPRFM
Functionality implied by ID_AA64ISAR2_EL1.RPRFM == 0b0001.
HWCAP2_SVE2P1
- Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.SVEver == 0b0010.
HWCAP2_SME2
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
@@ -309,7 +375,8 @@ HWCAP2_HBC
Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.
HWCAP2_SVE_B16B16
- Functionality implied by ID_AA64ZFR0_EL1.B16B16 == 0b0001.
+ Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+ ID_AA64ZFR0_EL1.B16B16 == 0b0001.
HWCAP2_LRCPC3
Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0011.
@@ -317,6 +384,67 @@ HWCAP2_LRCPC3
HWCAP2_LSE128
Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011.
+HWCAP2_FPMR
+ Functionality implied by ID_AA64PFR2_EL1.FMR == 0b0001.
+
+HWCAP2_LUT
+ Functionality implied by ID_AA64ISAR2_EL1.LUT == 0b0001.
+
+HWCAP2_FAMINMAX
+ Functionality implied by ID_AA64ISAR3_EL1.FAMINMAX == 0b0001.
+
+HWCAP2_F8CVT
+ Functionality implied by ID_AA64FPFR0_EL1.F8CVT == 0b1.
+
+HWCAP2_F8FMA
+ Functionality implied by ID_AA64FPFR0_EL1.F8FMA == 0b1.
+
+HWCAP2_F8DP4
+ Functionality implied by ID_AA64FPFR0_EL1.F8DP4 == 0b1.
+
+HWCAP2_F8DP2
+ Functionality implied by ID_AA64FPFR0_EL1.F8DP2 == 0b1.
+
+HWCAP2_F8E4M3
+ Functionality implied by ID_AA64FPFR0_EL1.F8E4M3 == 0b1.
+
+HWCAP2_F8E5M2
+ Functionality implied by ID_AA64FPFR0_EL1.F8E5M2 == 0b1.
+
+HWCAP2_SME_LUTV2
+ Functionality implied by ID_AA64SMFR0_EL1.LUTv2 == 0b1.
+
+HWCAP2_SME_F8F16
+ Functionality implied by ID_AA64SMFR0_EL1.F8F16 == 0b1.
+
+HWCAP2_SME_F8F32
+ Functionality implied by ID_AA64SMFR0_EL1.F8F32 == 0b1.
+
+HWCAP2_SME_SF8FMA
+ Functionality implied by ID_AA64SMFR0_EL1.SF8FMA == 0b1.
+
+HWCAP2_SME_SF8DP4
+ Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
+
+HWCAP2_SME_SF8DP2
+ Functionality implied by ID_AA64SMFR0_EL1.SF8DP2 == 0b1.
+
+HWCAP2_SME_SF8DP4
+ Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1.
+
+HWCAP2_POE
+ Functionality implied by ID_AA64MMFR3_EL1.S1POE == 0b0001.
+
+HWCAP3_MTE_FAR
+ Functionality implied by ID_AA64PFR2_EL1.MTEFAR == 0b0001.
+
+HWCAP3_MTE_STORE_ONLY
+ Functionality implied by ID_AA64PFR2_EL1.MTESTOREONLY == 0b0001.
+
+HWCAP3_LSFE
+ Functionality implied by ID_AA64ISAR3_EL1.LSFE == 0b0001
+
+
4. Unused AT_HWCAP bits
-----------------------
diff --git a/Documentation/arch/arm64/gcs.rst b/Documentation/arch/arm64/gcs.rst
new file mode 100644
index 000000000000..226c0b008456
--- /dev/null
+++ b/Documentation/arch/arm64/gcs.rst
@@ -0,0 +1,227 @@
+===============================================
+Guarded Control Stack support for AArch64 Linux
+===============================================
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Guarded Control Stack (GCS) feature.
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.
+
+
+
+1. General
+-----------
+
+* GCS is an architecture feature intended to provide greater protection
+ against return oriented programming (ROP) attacks and to simplify the
+ implementation of features that need to collect stack traces such as
+ profiling.
+
+* When GCS is enabled a separate guarded control stack is maintained by the
+ PE which is writeable only through specific GCS operations. This
+ stores the call stack only, when a procedure call instruction is
+ performed the current PC is pushed onto the GCS and on RET the
+ address in the LR is verified against that on the top of the GCS.
+
+* When active the current GCS pointer is stored in the system register
+ GCSPR_EL0. This is readable by userspace but can only be updated
+ via specific GCS instructions.
+
+* The architecture provides instructions for switching between guarded
+ control stacks with checks to ensure that the new stack is a valid
+ target for switching.
+
+* The functionality of GCS is similar to that provided by the x86 Shadow
+ Stack feature, due to sharing of userspace interfaces the ABI refers to
+ shadow stacks rather than GCS.
+
+* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
+ AT_HWCAP entry.
+
+* GCS is enabled per thread. While there is support for disabling GCS
+ at runtime this should be done with great care.
+
+* GCS memory access faults are reported as normal memory access faults.
+
+* GCS specific errors (those reported with EC 0x2d) will be reported as
+ SIGSEGV with a si_code of SEGV_CPERR (control protection error).
+
+* GCS is supported only for AArch64.
+
+* On systems where GCS is supported GCSPR_EL0 is always readable by EL0
+ regardless of the GCS configuration for the thread.
+
+* The architecture supports enabling GCS without verifying that return values
+ in LR match those in the GCS, the LR will be ignored. This is not supported
+ by Linux.
+
+
+
+2. Enabling and disabling Guarded Control Stacks
+-------------------------------------------------
+
+* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS
+ prctl(), this takes a single flags argument specifying which GCS features
+ should be used.
+
+* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack
+ and enables GCS for the thread, enabling the functionality controlled by
+ GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}.
+
+* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled
+ by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes.
+
+* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled
+ by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack.
+
+* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL.
+
+* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same
+ values as used for PR_SET_SHADOW_STACK_STATUS. Any future changes to the
+ status of the specified GCS mode bits will be rejected.
+
+* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows
+ userspace to prevent changes to any future features.
+
+* There is no support for a process to remove a lock that has been set for
+ it.
+
+* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the
+ thread that called them, any other running threads will be unaffected.
+
+* New threads inherit the GCS configuration of the thread that created them.
+
+* GCS is disabled on exec().
+
+* The current GCS configuration for a thread may be read with the
+ PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that
+ are passed to PR_SET_SHADOW_STACK_STATUS.
+
+* If GCS is disabled for a thread after having previously been enabled then
+ the stack will remain allocated for the lifetime of the thread. At present
+ any attempt to reenable GCS for the thread will be rejected, this may be
+ revisited in future.
+
+* It should be noted that since enabling GCS will result in GCS becoming
+ active immediately it is not normally possible to return from the function
+ that invoked the prctl() that enabled GCS. It is expected that the normal
+ usage will be that GCS is enabled very early in execution of a program.
+
+
+
+3. Allocation of Guarded Control Stacks
+----------------------------------------
+
+* When GCS is enabled for a thread a new Guarded Control Stack will be
+ allocated for it of half the standard stack size or 2 gigabytes,
+ whichever is smaller.
+
+* When a new thread is created by a thread which has GCS enabled then a
+ new Guarded Control Stack will be allocated for the new thread with
+ half the size of the standard stack.
+
+* When a stack is allocated by enabling GCS or during thread creation then
+ the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will
+ be set to point to the address of this 0 value, this can be used to
+ detect the top of the stack.
+
+* Additional Guarded Control Stacks can be allocated using the
+ map_shadow_stack() system call.
+
+* Stacks allocated using map_shadow_stack() can optionally have an end of
+ stack marker and cap placed at the top of the stack. If the flag
+ SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack,
+ if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8
+ bytes of the stack and if it is specified then the cap will be the next
+ 8 bytes. While specifying just SHADOW_STACK_SET_MARKER by itself is
+ valid since the marker is all bits 0 it has no observable effect.
+
+* Stacks allocated using map_shadow_stack() must have a size which is a
+ multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned.
+
+* An address can be specified to map_shadow_stack(), if one is provided then
+ it must be aligned to a page boundary.
+
+* When a thread is freed the Guarded Control Stack initially allocated for
+ that thread will be freed. Note carefully that if the stack has been
+ switched this may not be the stack currently in use by the thread.
+
+
+4. Signal handling
+--------------------
+
+* A new signal frame record gcs_context encodes the current GCS mode and
+ pointer for the interrupted context on signal delivery. This will always
+ be present on systems that support GCS.
+
+* The record contains a flag field which reports the current GCS configuration
+ for the interrupted context as PR_GET_SHADOW_STACK_STATUS would.
+
+* The signal handler is run with the same GCS configuration as the interrupted
+ context.
+
+* When GCS is enabled for the interrupted thread a signal handling specific
+ GCS cap token will be written to the GCS, this is an architectural GCS cap
+ with the token type (bits 0..11) all clear. The GCSPR_EL0 reported in the
+ signal frame will point to this cap token.
+
+* The signal handler will use the same GCS as the interrupted context.
+
+* When GCS is enabled on signal entry a frame with the address of the signal
+ return handler will be pushed onto the GCS, allowing return from the signal
+ handler via RET as normal. This will not be reported in the gcs_context in
+ the signal frame.
+
+
+5. Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is a gcs_context record in the signal frame then the GCS flags
+ and GCSPR_EL0 will be restored from that context prior to further
+ validation.
+
+* If there is no gcs_context record in the signal frame then the GCS
+ configuration will be unchanged.
+
+* If GCS is enabled on return from a signal handler then GCSPR_EL0 must
+ point to a valid GCS signal cap record, this will be popped from the
+ GCS prior to signal return.
+
+* If the GCS configuration is locked when returning from a signal then any
+ attempt to change the GCS configuration will be treated as an error. This
+ is true even if GCS was not enabled prior to signal entry.
+
+* GCS may be disabled via signal return but any attempt to enable GCS via
+ signal return will be rejected.
+
+
+6. ptrace extensions
+---------------------
+
+* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and
+ PTRACE_SETREGSET.
+
+* The GCS mode, including enable and disable, may be configured via ptrace.
+ If GCS is enabled via ptrace no new GCS will be allocated for the thread.
+
+* Configuration via ptrace ignores locking of GCS mode bits.
+
+
+7. ELF coredump extensions
+---------------------------
+
+* NT_ARM_GCS notes will be added to each coredump for each thread of the
+ dumped process. The contents will be equivalent to the data that would
+ have been read if a PTRACE_GETREGSET of the corresponding type were
+ executed for each thread when the coredump was generated.
+
+
+
+8. /proc extensions
+--------------------
+
+* Guarded Control Stack pages will include "ss" in their VmFlags in
+ /proc/<pid>/smaps.
diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst
index d08e924204bf..6a012c98bdcd 100644
--- a/Documentation/arch/arm64/index.rst
+++ b/Documentation/arch/arm64/index.rst
@@ -10,15 +10,19 @@ ARM64 Architecture
acpi_object_usage
amu
arm-acpi
+ arm-cca
asymmetric-32bit
booting
cpu-feature-registers
+ cpu-hotplug
elf_hwcaps
+ gcs
hugetlbpage
kdump
legacy_instructions
memory
memory-tagging-extension
+ mops
perf
pointer-authentication
ptdump
diff --git a/Documentation/arch/arm64/memory.rst b/Documentation/arch/arm64/memory.rst
index 55a55f30eed8..678fbb418c3a 100644
--- a/Documentation/arch/arm64/memory.rst
+++ b/Documentation/arch/arm64/memory.rst
@@ -18,77 +18,10 @@ ARMv8.2 adds optional support for Large Virtual Address space. This is
only available when running with a 64KB page size and expands the
number of descriptors in the first level of translation.
-User addresses have bits 63:48 set to 0 while the kernel addresses have
-the same bits set to 1. TTBRx selection is given by bit 63 of the
-virtual address. The swapper_pg_dir contains only kernel (global)
-mappings while the user pgd contains only user (non-global) mappings.
-The swapper_pg_dir address is written to TTBR1 and never written to
-TTBR0.
-
-
-AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
-
- Start End Size Use
- -----------------------------------------------------------------------
- 0000000000000000 0000ffffffffffff 256TB user
- ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
- [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
- ffff800000000000 ffff80007fffffff 2GB modules
- ffff800080000000 fffffbffefffffff 124TB vmalloc
- fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
- fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
- fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
- fffffbffff800000 fffffbffffffffff 8MB [guard region]
- fffffc0000000000 fffffdffffffffff 2TB vmemmap
- fffffe0000000000 ffffffffffffffff 2TB [guard region]
-
-
-AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support)::
-
- Start End Size Use
- -----------------------------------------------------------------------
- 0000000000000000 000fffffffffffff 4PB user
- fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
- [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
- ffff800000000000 ffff80007fffffff 2GB modules
- ffff800080000000 fffffbffefffffff 124TB vmalloc
- fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
- fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
- fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
- fffffbffff800000 fffffbffffffffff 8MB [guard region]
- fffffc0000000000 ffffffdfffffffff ~4TB vmemmap
- ffffffe000000000 ffffffffffffffff 128GB [guard region]
-
-
-Translation table lookup with 4KB pages::
-
- +--------+--------+--------+--------+--------+--------+--------+--------+
- |63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
- +--------+--------+--------+--------+--------+--------+--------+--------+
- | | | | | |
- | | | | | v
- | | | | | [11:0] in-page offset
- | | | | +-> [20:12] L3 index
- | | | +-----------> [29:21] L2 index
- | | +---------------------> [38:30] L1 index
- | +-------------------------------> [47:39] L0 index
- +-------------------------------------------------> [63] TTBR0/1
-
-
-Translation table lookup with 64KB pages::
-
- +--------+--------+--------+--------+--------+--------+--------+--------+
- |63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
- +--------+--------+--------+--------+--------+--------+--------+--------+
- | | | | |
- | | | | v
- | | | | [15:0] in-page offset
- | | | +----------> [28:16] L3 index
- | | +--------------------------> [41:29] L2 index
- | +-------------------------------> [47:42] L1 index (48-bit)
- | [51:42] L1 index (52-bit)
- +-------------------------------------------------> [63] TTBR0/1
-
+TTBRx selection is given by bit 55 of the virtual address. The
+swapper_pg_dir contains only kernel (global) mappings while the user pgd
+contains only user (non-global) mappings. The swapper_pg_dir address is
+written to TTBR1 and never written to TTBR0.
When using KVM without the Virtualization Host Extensions, the
hypervisor maps kernel pages in EL2 at a fixed (and potentially
diff --git a/Documentation/arch/arm64/mops.rst b/Documentation/arch/arm64/mops.rst
new file mode 100644
index 000000000000..2ef5b147f8dc
--- /dev/null
+++ b/Documentation/arch/arm64/mops.rst
@@ -0,0 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Memory copy/set instructions (MOPS)
+===================================
+
+A MOPS memory copy/set operation consists of three consecutive CPY* or SET*
+instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE).
+
+A main or epilogue instruction can take a MOPS exception for various reasons,
+for example when a task is migrated to a CPU with a different MOPS
+implementation, or when the instruction's alignment and size requirements are
+not met. The software exception handler is then expected to reset the registers
+and restart execution from the prologue instruction. Normally this is handled
+by the kernel.
+
+For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in
+the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM).
+
+.. _arm64_mops_hyp:
+
+Hypervisor requirements
+-----------------------
+
+A hypervisor running a Linux guest must handle all MOPS exceptions from the
+guest kernel, as Linux may not be able to handle the exception at all times.
+For example, a MOPS exception can be taken when the hypervisor migrates a vCPU
+to another physical CPU with a different MOPS implementation.
+
+To do this, the hypervisor must:
+
+ - Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor.
+
+ - Have an exception handler that implements the algorithm from the Arm ARM
+ rules CNTMJ and MWFQH.
+
+ - Set the guest's PSTATE.SS to 0 in the exception handler, to handle a
+ potential step of the current instruction.
+
+ Note: Clearing PSTATE.SS is needed so that a single step exception is taken
+ on the next instruction (the prologue instruction). Otherwise prologue
+ would get silently stepped over and the single step exception taken on the
+ main instruction. Note that if the guest instruction is not being stepped
+ then clearing PSTATE.SS has no effect.
diff --git a/Documentation/arch/arm64/ptdump.rst b/Documentation/arch/arm64/ptdump.rst
index 5dcfc5d7cddf..51eb902ba41a 100644
--- a/Documentation/arch/arm64/ptdump.rst
+++ b/Documentation/arch/arm64/ptdump.rst
@@ -22,8 +22,6 @@ offlining of memory being accessed by the ptdump code.
In order to dump the kernel page tables, enable the following
configurations and mount debugfs::
- CONFIG_GENERIC_PTDUMP=y
- CONFIG_PTDUMP_CORE=y
CONFIG_PTDUMP_DEBUGFS=y
mount -t debugfs nodev /sys/kernel/debug
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index e8c2ce1f9df6..a7ec57060f64 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -35,8 +35,9 @@ can be triggered by Linux).
For software workarounds that may adversely impact systems unaffected by
the erratum in question, a Kconfig entry is added under "Kernel
Features" -> "ARM errata workarounds via the alternatives framework".
-These are enabled by default and patched in at runtime when an affected
-CPU is detected. For less-intrusive workarounds, a Kconfig option is not
+With the exception of workarounds for errata deemed "rare" by Arm, these
+are enabled by default and patched in at runtime when an affected CPU is
+detected. For less-intrusive workarounds, a Kconfig option is not
available and the code is structured (preferably with a comment) in such
a way that the erratum will not be hit.
@@ -54,6 +55,10 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 |
+----------------+-----------------+-----------------+-----------------------------+
+| Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 |
++----------------+-----------------+-----------------+-----------------------------+
+| Ampere | AmpereOne AC04 | AC04_CPU_23 | AMPERE_ERRATUM_AC04_CPU_23 |
++----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
+----------------+-----------------+-----------------+-----------------------------+
@@ -121,24 +126,52 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A76 | #1490853 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A76 | #3324349 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A77 | #1491015 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A77 | #3324348 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A78 | #3324344 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A78C | #3324346,3324347| ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A710 | #3324338 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A715 | #3456084 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-X1 | #1502854 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X1 | #3324344 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X1C | #3324346 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X2 | #3324338 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X3 | #3324335 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X4 | #3194386 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-X925 | #3324334 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1349291 | N/A |
@@ -147,15 +180,30 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N1 | #3324349 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N2 | #2253138 | ARM64_ERRATUM_2253138 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N2 | #3324339 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N3 | #3456111 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-V1 | #1619801 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
-| ARM | MMU-500 | #841119,826419 | N/A |
+| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-V3AE | #3312417 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA|
+| | | #562869,1047329 | |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-600 | #1076982,1209401| N/A |
+----------------+-----------------+-----------------+-----------------------------+
@@ -212,8 +260,11 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
-| Hisilicon | Hip08 SMMU PMCG | #162001900 | N/A |
-| | Hip09 SMMU PMCG | | |
+| Hisilicon | Hip{08,09,09A,10| #162001900 | N/A |
+| | ,10C,11} | | |
+| | SMMU PMCG | | |
++----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon | Hip09 | #162100801 | HISILICON_ERRATUM_162100801 |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
@@ -237,9 +288,20 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Rockchip | RK3588 | #3588001 | ROCKCHIP_ERRATUM_3588001 |
+----------------+-----------------+-----------------+-----------------------------+
+| Rockchip | RK3568 | #3568002 | ROCKCHIP_ERRATUM_3568002 |
++----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| ASR | ASR8601 | #8601001 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 |
++----------------+-----------------+-----------------+-----------------------------+
+| Microsoft | Azure Cobalt 100| #3324339 | ARM64_ERRATUM_3194386 |
++----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst
index 3d0e53ecac4f..583f2ee9cb97 100644
--- a/Documentation/arch/arm64/sme.rst
+++ b/Documentation/arch/arm64/sme.rst
@@ -69,29 +69,19 @@ model features for SME is included in Appendix A.
vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
used for SVE vectors.
-* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
- in which case it is set to 0.
+* On thread creation PSTATE.ZA and TPIDR2_EL0 are preserved unless CLONE_VM
+ is specified, in which case PSTATE.ZA is set to 0 and TPIDR2_EL0 is set to 0.
2. Vector lengths
------------------
-SME defines a second vector length similar to the SVE vector length which is
+SME defines a second vector length similar to the SVE vector length which
controls the size of the streaming mode SVE vectors and the ZA matrix array.
The ZA matrix is square with each side having as many bytes as a streaming
mode SVE vector.
-3. Sharing of streaming and non-streaming mode SVE state
----------------------------------------------------------
-
-It is implementation defined which if any parts of the SVE state are shared
-between streaming and non-streaming modes. When switching between modes
-via software interfaces such as ptrace if no register content is provided as
-part of switching no state will be assumed to be shared and everything will
-be zeroed.
-
-
-4. System call behaviour
+3. System call behaviour
-------------------------
* On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the
@@ -112,10 +102,10 @@ be zeroed.
exceptions for execve() described in section 6.
-5. Signal handling
+4. Signal handling
-------------------
-* Signal handlers are invoked with streaming mode and ZA disabled.
+* Signal handlers are invoked with PSTATE.SM=0, PSTATE.ZA=0, and TPIDR2_EL0=0.
* A new signal frame record TPIDR2_MAGIC is added formatted as a struct
tpidr2_context to allow access to TPIDR2_EL0 from signal handlers.
@@ -238,12 +228,12 @@ prctl(PR_SME_SET_VL, unsigned long arg)
bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
unspecified, including both streaming and non-streaming SVE state.
Calling PR_SME_SET_VL with vl equal to the thread's current vector
- length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+ length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
does not constitute a change to the vector length for this purpose.
- * Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
+ * Changing the vector length causes PSTATE.ZA to be cleared.
Calling PR_SME_SET_VL with vl equal to the thread's current vector
- length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag,
+ length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
does not constitute a change to the vector length for this purpose.
@@ -346,6 +336,10 @@ The regset data starts with struct user_za_header, containing:
* Writes to NT_ARM_ZT will set PSTATE.ZA to 1.
+* If any register data is provided along with SME_PT_VL_ONEXEC then the
+ registers data will be interpreted with the current vector length, not
+ the vector length configured for use on exec.
+
8. ELF coredump extensions
---------------------------
@@ -379,9 +373,8 @@ The regset data starts with struct user_za_header, containing:
/proc/sys/abi/sme_default_vector_length
Writing the text representation of an integer to this file sets the system
- default vector length to the specified value, unless the value is greater
- than the maximum vector length supported by the system in which case the
- default vector length is set to that maximum.
+ default vector length to the specified value rounded to a supported value
+ using the same rules as for setting vector length via PR_SME_SET_VL.
The result can be determined by reopening the file and reading its
contents.
diff --git a/Documentation/arch/arm64/sve.rst b/Documentation/arch/arm64/sve.rst
index 0d9a426e9f85..a61c9d0efe4d 100644
--- a/Documentation/arch/arm64/sve.rst
+++ b/Documentation/arch/arm64/sve.rst
@@ -117,11 +117,6 @@ the SVE instruction set architecture.
* The SVE registers are not used to pass arguments to or receive results from
any syscall.
-* In practice the affected registers/bits will be preserved or will be replaced
- with zeros on return from a syscall, but userspace should not make
- assumptions about this. The kernel behaviour may vary on a case-by-case
- basis.
-
* All other SVE state of a thread, including the currently configured vector
length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector
length (if any), is preserved across all syscalls, subject to the specific
@@ -407,6 +402,15 @@ The regset data starts with struct user_sve_header, containing:
streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
if the target was not in streaming mode.
+* On systems that do not support SVE it is permitted to use SETREGSET to
+ write SVE_PT_REGS_FPSIMD formatted data via NT_ARM_SVE, in this case the
+ vector length should be specified as 0. This allows streaming mode to be
+ disabled on systems with SME but not SVE.
+
+* If any register data is provided along with SVE_PT_VL_ONEXEC then the
+ registers data will be interpreted with the current vector length, not
+ the vector length configured for use on exec.
+
* The effect of writing a partial, incomplete payload is unspecified.
@@ -428,9 +432,8 @@ The regset data starts with struct user_sve_header, containing:
/proc/sys/abi/sve_default_vector_length
Writing the text representation of an integer to this file sets the system
- default vector length to the specified value, unless the value is greater
- than the maximum vector length supported by the system in which case the
- default vector length is set to that maximum.
+ default vector length to the specified value rounded to a supported value
+ using the same rules as for setting vector length via PR_SVE_SET_VL.
The result can be determined by reopening the file and reading its
contents.
diff --git a/Documentation/arch/arm64/tagged-pointers.rst b/Documentation/arch/arm64/tagged-pointers.rst
index 81b6c2a770dd..f87a925ca9a5 100644
--- a/Documentation/arch/arm64/tagged-pointers.rst
+++ b/Documentation/arch/arm64/tagged-pointers.rst
@@ -60,11 +60,12 @@ that signal handlers in applications making use of tags cannot rely
on the tag information for user virtual addresses being maintained
in these fields unless the flag was set.
-Due to architecture limitations, bits 63:60 of the fault address
-are not preserved in response to synchronous tag check faults
-(SEGV_MTESERR) even if SA_EXPOSE_TAGBITS was set. Applications should
-treat the values of these bits as undefined in order to accommodate
-future architecture revisions which may preserve the bits.
+If FEAT_MTE_TAGGED_FAR (Armv8.9) is supported, bits 63:60 of the fault address
+are preserved in response to synchronous tag check faults (SEGV_MTESERR)
+otherwise not preserved even if SA_EXPOSE_TAGBITS was set.
+Applications should interpret the values of these bits based on
+the support for the HWCAP3_MTE_FAR. If the support is not present,
+the values of these bits should be considered as undefined otherwise valid.
For signals raised in response to watchpoint debug exceptions, the
tag information will be preserved regardless of the SA_EXPOSE_TAGBITS
diff --git a/Documentation/arch/loongarch/irq-chip-model.rst b/Documentation/arch/loongarch/irq-chip-model.rst
index 7988f4192363..8f5c3345109e 100644
--- a/Documentation/arch/loongarch/irq-chip-model.rst
+++ b/Documentation/arch/loongarch/irq-chip-model.rst
@@ -85,6 +85,102 @@ to CPUINTC directly::
| Devices |
+---------+
+Virtual Extended IRQ model
+==========================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt
+go to CPUINTC directly, CPU UARTS interrupts go to PCH-PIC, while all other
+devices interrupts go to PCH-PIC/PCH-MSI and gathered by V-EIOINTC (Virtual
+Extended I/O Interrupt Controller), and then go to CPUINTC directly::
+
+ +-----+ +-------------------+ +-------+
+ | IPI |--> | CPUINTC(0-255vcpu)| <-- | Timer |
+ +-----+ +-------------------+ +-------+
+ ^
+ |
+ +-----------+
+ | V-EIOINTC |
+ +-----------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +--------+ +---------+ +---------+
+ | UARTs | | Devices | | Devices |
+ +--------+ +---------+ +---------+
+
+
+Description
+-----------
+V-EIOINTC (Virtual Extended I/O Interrupt Controller) is an extension of
+EIOINTC, it only works in VM mode which runs in KVM hypervisor. Interrupts can
+be routed to up to four vCPUs via standard EIOINTC, however with V-EIOINTC
+interrupts can be routed to up to 256 virtual cpus.
+
+With standard EIOINTC, interrupt routing setting includes two parts: eight
+bits for CPU selection and four bits for CPU IP (Interrupt Pin) selection.
+For CPU selection there is four bits for EIOINTC node selection, four bits
+for EIOINTC CPU selection. Bitmap method is used for CPU selection and
+CPU IP selection, so interrupt can only route to CPU0 - CPU3 and IP0-IP3 in
+one EIOINTC node.
+
+With V-EIOINTC it supports to route more CPUs and CPU IP (Interrupt Pin),
+there are two newly added registers with V-EIOINTC.
+
+EXTIOI_VIRT_FEATURES
+--------------------
+This register is read-only register, which indicates supported features with
+V-EIOINTC. Feature EXTIOI_HAS_INT_ENCODE and EXTIOI_HAS_CPU_ENCODE is added.
+
+Feature EXTIOI_HAS_INT_ENCODE is part of standard EIOINTC. If it is 1, it
+indicates that CPU Interrupt Pin selection can be normal method rather than
+bitmap method, so interrupt can be routed to IP0 - IP15.
+
+Feature EXTIOI_HAS_CPU_ENCODE is extension of V-EIOINTC. If it is 1, it
+indicates that CPU selection can be normal method rather than bitmap method,
+so interrupt can be routed to CPU0 - CPU255.
+
+EXTIOI_VIRT_CONFIG
+------------------
+This register is read-write register, for compatibility interrupt routed uses
+the default method which is the same with standard EIOINTC. If the bit is set
+with 1, it indicated HW to use normal method rather than bitmap method.
+
+Advanced Extended IRQ model
+===========================
+
+In this model, IPI (Inter-Processor Interrupt) and CPU Local Timer interrupt go
+to CPUINTC directly, CPU UARTS interrupts go to LIOINTC, PCH-MSI interrupts go
+to AVECINTC, and then go to CPUINTC directly, while all other devices interrupts
+go to PCH-PIC/PCH-LPC and gathered by EIOINTC, and then go to CPUINTC directly::
+
+ +-----+ +-----------------------+ +-------+
+ | IPI | --> | CPUINTC | <-- | Timer |
+ +-----+ +-----------------------+ +-------+
+ ^ ^ ^
+ | | |
+ +---------+ +----------+ +---------+ +-------+
+ | EIOINTC | | AVECINTC | | LIOINTC | <-- | UARTs |
+ +---------+ +----------+ +---------+ +-------+
+ ^ ^
+ | |
+ +---------+ +---------+
+ | PCH-PIC | | PCH-MSI |
+ +---------+ +---------+
+ ^ ^ ^
+ | | |
+ +---------+ +---------+ +---------+
+ | Devices | | PCH-LPC | | Devices |
+ +---------+ +---------+ +---------+
+ ^
+ |
+ +---------+
+ | Devices |
+ +---------+
+
ACPI-related definitions
========================
diff --git a/Documentation/arch/m68k/buddha-driver.rst b/Documentation/arch/m68k/buddha-driver.rst
index 20e401413991..5d1bc824978b 100644
--- a/Documentation/arch/m68k/buddha-driver.rst
+++ b/Documentation/arch/m68k/buddha-driver.rst
@@ -173,7 +173,7 @@ When accessing IDE registers with A6=1 (for example $84x),
the timing will always be mode 0 8-bit compatible, no matter
what you have selected in the speed register:
-781ns select, IOR/IOW after 4 clock cycles (=314ns) aktive.
+781ns select, IOR/IOW after 4 clock cycles (=314ns) active.
All the timings with a very short select-signal (the 355ns
fast accesses) depend on the accelerator card used in the
diff --git a/Documentation/arch/openrisc/openrisc_port.rst b/Documentation/arch/openrisc/openrisc_port.rst
index 1565b9546e38..60b0a9e51d70 100644
--- a/Documentation/arch/openrisc/openrisc_port.rst
+++ b/Documentation/arch/openrisc/openrisc_port.rst
@@ -7,10 +7,10 @@ target architecture, specifically, is the 32-bit OpenRISC 1000 family (or1k).
For information about OpenRISC processors and ongoing development:
- ======= =============================
+ ======= ==============================
website https://openrisc.io
- email openrisc@lists.librecores.org
- ======= =============================
+ email linux-openrisc@vger.kernel.org
+ ======= ==============================
---------------------------------------------------------------------
@@ -27,11 +27,11 @@ Toolchain binaries can be obtained from openrisc.io or our github releases page.
Instructions for building the different toolchains can be found on openrisc.io
or Stafford's toolchain build and release scripts.
- ========== =================================================
- binaries https://github.com/openrisc/or1k-gcc/releases
+ ========== ==========================================================
+ binaries https://github.com/stffrdhrn/or1k-toolchain-build/releases
toolchains https://openrisc.io/software
building https://github.com/stffrdhrn/or1k-toolchain-build
- ========== =================================================
+ ========== ==========================================================
2) Building
@@ -40,6 +40,12 @@ Build the Linux kernel as usual::
make ARCH=openrisc CROSS_COMPILE="or1k-linux-" defconfig
make ARCH=openrisc CROSS_COMPILE="or1k-linux-"
+If you want to embed initramfs in the kernel, also pass ``CONFIG_INITRAMFS_SOURCE``. For example::
+
+ make ARCH=openrisc CROSS_COMPILE="or1k-linux-" CONFIG_INITRAMFS_SOURCE="path/to/rootfs path/to/devnodes"
+
+For more information on this, please check Documentation/filesystems/ramfs-rootfs-initramfs.rst.
+
3) Running on FPGA (optional)
The OpenRISC community typically uses FuseSoC to manage building and programming
diff --git a/Documentation/arch/powerpc/booting.rst b/Documentation/arch/powerpc/booting.rst
index 11aa440f98cc..472e97891aef 100644
--- a/Documentation/arch/powerpc/booting.rst
+++ b/Documentation/arch/powerpc/booting.rst
@@ -93,8 +93,8 @@ given platform based on the content of the device-tree. Thus, you
should:
a) add your platform support as a _boolean_ option in
- arch/powerpc/Kconfig, following the example of PPC_PSERIES,
- PPC_PMAC and PPC_MAPLE. The latter is probably a good
+ arch/powerpc/Kconfig, following the example of PPC_PSERIES
+ and PPC_PMAC. The latter is probably a good
example of a board support to start from.
b) create your main platform file as
diff --git a/Documentation/arch/powerpc/cpu_families.rst b/Documentation/arch/powerpc/cpu_families.rst
index eb7e60649b43..f55433c6b8f3 100644
--- a/Documentation/arch/powerpc/cpu_families.rst
+++ b/Documentation/arch/powerpc/cpu_families.rst
@@ -128,24 +128,6 @@ IBM BookE
- All 32 bit::
+--------------+
- | 401 |
- +--------------+
- |
- |
- v
- +--------------+
- | 403 |
- +--------------+
- |
- |
- v
- +--------------+
- | 405 |
- +--------------+
- |
- |
- v
- +--------------+
| 440 |
+--------------+
|
diff --git a/Documentation/arch/powerpc/cxl.rst b/Documentation/arch/powerpc/cxl.rst
deleted file mode 100644
index d2d77057610e..000000000000
--- a/Documentation/arch/powerpc/cxl.rst
+++ /dev/null
@@ -1,469 +0,0 @@
-====================================
-Coherent Accelerator Interface (CXL)
-====================================
-
-Introduction
-============
-
- The coherent accelerator interface is designed to allow the
- coherent connection of accelerators (FPGAs and other devices) to a
- POWER system. These devices need to adhere to the Coherent
- Accelerator Interface Architecture (CAIA).
-
- IBM refers to this as the Coherent Accelerator Processor Interface
- or CAPI. In the kernel it's referred to by the name CXL to avoid
- confusion with the ISDN CAPI subsystem.
-
- Coherent in this context means that the accelerator and CPUs can
- both access system memory directly and with the same effective
- addresses.
-
-
-Hardware overview
-=================
-
- ::
-
- POWER8/9 FPGA
- +----------+ +---------+
- | | | |
- | CPU | | AFU |
- | | | |
- | | | |
- | | | |
- +----------+ +---------+
- | PHB | | |
- | +------+ | PSL |
- | | CAPP |<------>| |
- +---+------+ PCIE +---------+
-
- The POWER8/9 chip has a Coherently Attached Processor Proxy (CAPP)
- unit which is part of the PCIe Host Bridge (PHB). This is managed
- by Linux by calls into OPAL. Linux doesn't directly program the
- CAPP.
-
- The FPGA (or coherently attached device) consists of two parts.
- The POWER Service Layer (PSL) and the Accelerator Function Unit
- (AFU). The AFU is used to implement specific functionality behind
- the PSL. The PSL, among other things, provides memory address
- translation services to allow each AFU direct access to userspace
- memory.
-
- The AFU is the core part of the accelerator (eg. the compression,
- crypto etc function). The kernel has no knowledge of the function
- of the AFU. Only userspace interacts directly with the AFU.
-
- The PSL provides the translation and interrupt services that the
- AFU needs. This is what the kernel interacts with. For example, if
- the AFU needs to read a particular effective address, it sends
- that address to the PSL, the PSL then translates it, fetches the
- data from memory and returns it to the AFU. If the PSL has a
- translation miss, it interrupts the kernel and the kernel services
- the fault. The context to which this fault is serviced is based on
- who owns that acceleration function.
-
- - POWER8 and PSL Version 8 are compliant to the CAIA Version 1.0.
- - POWER9 and PSL Version 9 are compliant to the CAIA Version 2.0.
-
- This PSL Version 9 provides new features such as:
-
- * Interaction with the nest MMU on the P9 chip.
- * Native DMA support.
- * Supports sending ASB_Notify messages for host thread wakeup.
- * Supports Atomic operations.
- * etc.
-
- Cards with a PSL9 won't work on a POWER8 system and cards with a
- PSL8 won't work on a POWER9 system.
-
-AFU Modes
-=========
-
- There are two programming modes supported by the AFU. Dedicated
- and AFU directed. AFU may support one or both modes.
-
- When using dedicated mode only one MMU context is supported. In
- this mode, only one userspace process can use the accelerator at
- time.
-
- When using AFU directed mode, up to 16K simultaneous contexts can
- be supported. This means up to 16K simultaneous userspace
- applications may use the accelerator (although specific AFUs may
- support fewer). In this mode, the AFU sends a 16 bit context ID
- with each of its requests. This tells the PSL which context is
- associated with each operation. If the PSL can't translate an
- operation, the ID can also be accessed by the kernel so it can
- determine the userspace context associated with an operation.
-
-
-MMIO space
-==========
-
- A portion of the accelerator MMIO space can be directly mapped
- from the AFU to userspace. Either the whole space can be mapped or
- just a per context portion. The hardware is self describing, hence
- the kernel can determine the offset and size of the per context
- portion.
-
-
-Interrupts
-==========
-
- AFUs may generate interrupts that are destined for userspace. These
- are received by the kernel as hardware interrupts and passed onto
- userspace by a read syscall documented below.
-
- Data storage faults and error interrupts are handled by the kernel
- driver.
-
-
-Work Element Descriptor (WED)
-=============================
-
- The WED is a 64-bit parameter passed to the AFU when a context is
- started. Its format is up to the AFU hence the kernel has no
- knowledge of what it represents. Typically it will be the
- effective address of a work queue or status block where the AFU
- and userspace can share control and status information.
-
-
-
-
-User API
-========
-
-1. AFU character devices
-^^^^^^^^^^^^^^^^^^^^^^^^
-
- For AFUs operating in AFU directed mode, two character device
- files will be created. /dev/cxl/afu0.0m will correspond to a
- master context and /dev/cxl/afu0.0s will correspond to a slave
- context. Master contexts have access to the full MMIO space an
- AFU provides. Slave contexts have access to only the per process
- MMIO space an AFU provides.
-
- For AFUs operating in dedicated process mode, the driver will
- only create a single character device per AFU called
- /dev/cxl/afu0.0d. This will have access to the entire MMIO space
- that the AFU provides (like master contexts in AFU directed).
-
- The types described below are defined in include/uapi/misc/cxl.h
-
- The following file operations are supported on both slave and
- master devices.
-
- A userspace library libcxl is available here:
-
- https://github.com/ibm-capi/libcxl
-
- This provides a C interface to this kernel API.
-
-open
-----
-
- Opens the device and allocates a file descriptor to be used with
- the rest of the API.
-
- A dedicated mode AFU only has one context and only allows the
- device to be opened once.
-
- An AFU directed mode AFU can have many contexts, the device can be
- opened once for each context that is available.
-
- When all available contexts are allocated the open call will fail
- and return -ENOSPC.
-
- Note:
- IRQs need to be allocated for each context, which may limit
- the number of contexts that can be created, and therefore
- how many times the device can be opened. The POWER8 CAPP
- supports 2040 IRQs and 3 are used by the kernel, so 2037 are
- left. If 1 IRQ is needed per context, then only 2037
- contexts can be allocated. If 4 IRQs are needed per context,
- then only 2037/4 = 509 contexts can be allocated.
-
-
-ioctl
------
-
- CXL_IOCTL_START_WORK:
- Starts the AFU context and associates it with the current
- process. Once this ioctl is successfully executed, all memory
- mapped into this process is accessible to this AFU context
- using the same effective addresses. No additional calls are
- required to map/unmap memory. The AFU memory context will be
- updated as userspace allocates and frees memory. This ioctl
- returns once the AFU context is started.
-
- Takes a pointer to a struct cxl_ioctl_start_work
-
- ::
-
- struct cxl_ioctl_start_work {
- __u64 flags;
- __u64 work_element_descriptor;
- __u64 amr;
- __s16 num_interrupts;
- __s16 reserved1;
- __s32 reserved2;
- __u64 reserved3;
- __u64 reserved4;
- __u64 reserved5;
- __u64 reserved6;
- };
-
- flags:
- Indicates which optional fields in the structure are
- valid.
-
- work_element_descriptor:
- The Work Element Descriptor (WED) is a 64-bit argument
- defined by the AFU. Typically this is an effective
- address pointing to an AFU specific structure
- describing what work to perform.
-
- amr:
- Authority Mask Register (AMR), same as the powerpc
- AMR. This field is only used by the kernel when the
- corresponding CXL_START_WORK_AMR value is specified in
- flags. If not specified the kernel will use a default
- value of 0.
-
- num_interrupts:
- Number of userspace interrupts to request. This field
- is only used by the kernel when the corresponding
- CXL_START_WORK_NUM_IRQS value is specified in flags.
- If not specified the minimum number required by the
- AFU will be allocated. The min and max number can be
- obtained from sysfs.
-
- reserved fields:
- For ABI padding and future extensions
-
- CXL_IOCTL_GET_PROCESS_ELEMENT:
- Get the current context id, also known as the process element.
- The value is returned from the kernel as a __u32.
-
-
-mmap
-----
-
- An AFU may have an MMIO space to facilitate communication with the
- AFU. If it does, the MMIO space can be accessed via mmap. The size
- and contents of this area are specific to the particular AFU. The
- size can be discovered via sysfs.
-
- In AFU directed mode, master contexts are allowed to map all of
- the MMIO space and slave contexts are allowed to only map the per
- process MMIO space associated with the context. In dedicated
- process mode the entire MMIO space can always be mapped.
-
- This mmap call must be done after the START_WORK ioctl.
-
- Care should be taken when accessing MMIO space. Only 32 and 64-bit
- accesses are supported by POWER8. Also, the AFU will be designed
- with a specific endianness, so all MMIO accesses should consider
- endianness (recommend endian(3) variants like: le64toh(),
- be64toh() etc). These endian issues equally apply to shared memory
- queues the WED may describe.
-
-
-read
-----
-
- Reads events from the AFU. Blocks if no events are pending
- (unless O_NONBLOCK is supplied). Returns -EIO in the case of an
- unrecoverable error or if the card is removed.
-
- read() will always return an integral number of events.
-
- The buffer passed to read() must be at least 4K bytes.
-
- The result of the read will be a buffer of one or more events,
- each event is of type struct cxl_event, of varying size::
-
- struct cxl_event {
- struct cxl_event_header header;
- union {
- struct cxl_event_afu_interrupt irq;
- struct cxl_event_data_storage fault;
- struct cxl_event_afu_error afu_error;
- };
- };
-
- The struct cxl_event_header is defined as
-
- ::
-
- struct cxl_event_header {
- __u16 type;
- __u16 size;
- __u16 process_element;
- __u16 reserved1;
- };
-
- type:
- This defines the type of event. The type determines how
- the rest of the event is structured. These types are
- described below and defined by enum cxl_event_type.
-
- size:
- This is the size of the event in bytes including the
- struct cxl_event_header. The start of the next event can
- be found at this offset from the start of the current
- event.
-
- process_element:
- Context ID of the event.
-
- reserved field:
- For future extensions and padding.
-
- If the event type is CXL_EVENT_AFU_INTERRUPT then the event
- structure is defined as
-
- ::
-
- struct cxl_event_afu_interrupt {
- __u16 flags;
- __u16 irq; /* Raised AFU interrupt number */
- __u32 reserved1;
- };
-
- flags:
- These flags indicate which optional fields are present
- in this struct. Currently all fields are mandatory.
-
- irq:
- The IRQ number sent by the AFU.
-
- reserved field:
- For future extensions and padding.
-
- If the event type is CXL_EVENT_DATA_STORAGE then the event
- structure is defined as
-
- ::
-
- struct cxl_event_data_storage {
- __u16 flags;
- __u16 reserved1;
- __u32 reserved2;
- __u64 addr;
- __u64 dsisr;
- __u64 reserved3;
- };
-
- flags:
- These flags indicate which optional fields are present in
- this struct. Currently all fields are mandatory.
-
- address:
- The address that the AFU unsuccessfully attempted to
- access. Valid accesses will be handled transparently by the
- kernel but invalid accesses will generate this event.
-
- dsisr:
- This field gives information on the type of fault. It is a
- copy of the DSISR from the PSL hardware when the address
- fault occurred. The form of the DSISR is as defined in the
- CAIA.
-
- reserved fields:
- For future extensions
-
- If the event type is CXL_EVENT_AFU_ERROR then the event structure
- is defined as
-
- ::
-
- struct cxl_event_afu_error {
- __u16 flags;
- __u16 reserved1;
- __u32 reserved2;
- __u64 error;
- };
-
- flags:
- These flags indicate which optional fields are present in
- this struct. Currently all fields are Mandatory.
-
- error:
- Error status from the AFU. Defined by the AFU.
-
- reserved fields:
- For future extensions and padding
-
-
-2. Card character device (powerVM guest only)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
- In a powerVM guest, an extra character device is created for the
- card. The device is only used to write (flash) a new image on the
- FPGA accelerator. Once the image is written and verified, the
- device tree is updated and the card is reset to reload the updated
- image.
-
-open
-----
-
- Opens the device and allocates a file descriptor to be used with
- the rest of the API. The device can only be opened once.
-
-ioctl
------
-
-CXL_IOCTL_DOWNLOAD_IMAGE / CXL_IOCTL_VALIDATE_IMAGE:
- Starts and controls flashing a new FPGA image. Partial
- reconfiguration is not supported (yet), so the image must contain
- a copy of the PSL and AFU(s). Since an image can be quite large,
- the caller may have to iterate, splitting the image in smaller
- chunks.
-
- Takes a pointer to a struct cxl_adapter_image::
-
- struct cxl_adapter_image {
- __u64 flags;
- __u64 data;
- __u64 len_data;
- __u64 len_image;
- __u64 reserved1;
- __u64 reserved2;
- __u64 reserved3;
- __u64 reserved4;
- };
-
- flags:
- These flags indicate which optional fields are present in
- this struct. Currently all fields are mandatory.
-
- data:
- Pointer to a buffer with part of the image to write to the
- card.
-
- len_data:
- Size of the buffer pointed to by data.
-
- len_image:
- Full size of the image.
-
-
-Sysfs Class
-===========
-
- A cxl sysfs class is added under /sys/class/cxl to facilitate
- enumeration and tuning of the accelerators. Its layout is
- described in Documentation/ABI/testing/sysfs-class-cxl
-
-
-Udev rules
-==========
-
- The following udev rules could be used to create a symlink to the
- most logical chardev to use in any programming mode (afuX.Yd for
- dedicated, afuX.Ys for afu directed), since the API is virtually
- identical for each::
-
- SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
- SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
- KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b"
diff --git a/Documentation/arch/powerpc/cxlflash.rst b/Documentation/arch/powerpc/cxlflash.rst
deleted file mode 100644
index e8f488acfa41..000000000000
--- a/Documentation/arch/powerpc/cxlflash.rst
+++ /dev/null
@@ -1,433 +0,0 @@
-================================
-Coherent Accelerator (CXL) Flash
-================================
-
-Introduction
-============
-
- The IBM Power architecture provides support for CAPI (Coherent
- Accelerator Power Interface), which is available to certain PCIe slots
- on Power 8 systems. CAPI can be thought of as a special tunneling
- protocol through PCIe that allow PCIe adapters to look like special
- purpose co-processors which can read or write an application's
- memory and generate page faults. As a result, the host interface to
- an adapter running in CAPI mode does not require the data buffers to
- be mapped to the device's memory (IOMMU bypass) nor does it require
- memory to be pinned.
-
- On Linux, Coherent Accelerator (CXL) kernel services present CAPI
- devices as a PCI device by implementing a virtual PCI host bridge.
- This abstraction simplifies the infrastructure and programming
- model, allowing for drivers to look similar to other native PCI
- device drivers.
-
- CXL provides a mechanism by which user space applications can
- directly talk to a device (network or storage) bypassing the typical
- kernel/device driver stack. The CXL Flash Adapter Driver enables a
- user space application direct access to Flash storage.
-
- The CXL Flash Adapter Driver is a kernel module that sits in the
- SCSI stack as a low level device driver (below the SCSI disk and
- protocol drivers) for the IBM CXL Flash Adapter. This driver is
- responsible for the initialization of the adapter, setting up the
- special path for user space access, and performing error recovery. It
- communicates directly the Flash Accelerator Functional Unit (AFU)
- as described in Documentation/arch/powerpc/cxl.rst.
-
- The cxlflash driver supports two, mutually exclusive, modes of
- operation at the device (LUN) level:
-
- - Any flash device (LUN) can be configured to be accessed as a
- regular disk device (i.e.: /dev/sdc). This is the default mode.
-
- - Any flash device (LUN) can be configured to be accessed from
- user space with a special block library. This mode further
- specifies the means of accessing the device and provides for
- either raw access to the entire LUN (referred to as direct
- or physical LUN access) or access to a kernel/AFU-mediated
- partition of the LUN (referred to as virtual LUN access). The
- segmentation of a disk device into virtual LUNs is assisted
- by special translation services provided by the Flash AFU.
-
-Overview
-========
-
- The Coherent Accelerator Interface Architecture (CAIA) introduces a
- concept of a master context. A master typically has special privileges
- granted to it by the kernel or hypervisor allowing it to perform AFU
- wide management and control. The master may or may not be involved
- directly in each user I/O, but at the minimum is involved in the
- initial setup before the user application is allowed to send requests
- directly to the AFU.
-
- The CXL Flash Adapter Driver establishes a master context with the
- AFU. It uses memory mapped I/O (MMIO) for this control and setup. The
- Adapter Problem Space Memory Map looks like this::
-
- +-------------------------------+
- | 512 * 64 KB User MMIO |
- | (per context) |
- | User Accessible |
- +-------------------------------+
- | 512 * 128 B per context |
- | Provisioning and Control |
- | Trusted Process accessible |
- +-------------------------------+
- | 64 KB Global |
- | Trusted Process accessible |
- +-------------------------------+
-
- This driver configures itself into the SCSI software stack as an
- adapter driver. The driver is the only entity that is considered a
- Trusted Process to program the Provisioning and Control and Global
- areas in the MMIO Space shown above. The master context driver
- discovers all LUNs attached to the CXL Flash adapter and instantiates
- scsi block devices (/dev/sdb, /dev/sdc etc.) for each unique LUN
- seen from each path.
-
- Once these scsi block devices are instantiated, an application
- written to a specification provided by the block library may get
- access to the Flash from user space (without requiring a system call).
-
- This master context driver also provides a series of ioctls for this
- block library to enable this user space access. The driver supports
- two modes for accessing the block device.
-
- The first mode is called a virtual mode. In this mode a single scsi
- block device (/dev/sdb) may be carved up into any number of distinct
- virtual LUNs. The virtual LUNs may be resized as long as the sum of
- the sizes of all the virtual LUNs, along with the meta-data associated
- with it does not exceed the physical capacity.
-
- The second mode is called the physical mode. In this mode a single
- block device (/dev/sdb) may be opened directly by the block library
- and the entire space for the LUN is available to the application.
-
- Only the physical mode provides persistence of the data. i.e. The
- data written to the block device will survive application exit and
- restart and also reboot. The virtual LUNs do not persist (i.e. do
- not survive after the application terminates or the system reboots).
-
-
-Block library API
-=================
-
- Applications intending to get access to the CXL Flash from user
- space should use the block library, as it abstracts the details of
- interfacing directly with the cxlflash driver that are necessary for
- performing administrative actions (i.e.: setup, tear down, resize).
- The block library can be thought of as a 'user' of services,
- implemented as IOCTLs, that are provided by the cxlflash driver
- specifically for devices (LUNs) operating in user space access
- mode. While it is not a requirement that applications understand
- the interface between the block library and the cxlflash driver,
- a high-level overview of each supported service (IOCTL) is provided
- below.
-
- The block library can be found on GitHub:
- http://github.com/open-power/capiflash
-
-
-CXL Flash Driver LUN IOCTLs
-===========================
-
- Users, such as the block library, that wish to interface with a flash
- device (LUN) via user space access need to use the services provided
- by the cxlflash driver. As these services are implemented as ioctls,
- a file descriptor handle must first be obtained in order to establish
- the communication channel between a user and the kernel. This file
- descriptor is obtained by opening the device special file associated
- with the scsi disk device (/dev/sdb) that was created during LUN
- discovery. As per the location of the cxlflash driver within the
- SCSI protocol stack, this open is actually not seen by the cxlflash
- driver. Upon successful open, the user receives a file descriptor
- (herein referred to as fd1) that should be used for issuing the
- subsequent ioctls listed below.
-
- The structure definitions for these IOCTLs are available in:
- uapi/scsi/cxlflash_ioctl.h
-
-DK_CXLFLASH_ATTACH
-------------------
-
- This ioctl obtains, initializes, and starts a context using the CXL
- kernel services. These services specify a context id (u16) by which
- to uniquely identify the context and its allocated resources. The
- services additionally provide a second file descriptor (herein
- referred to as fd2) that is used by the block library to initiate
- memory mapped I/O (via mmap()) to the CXL flash device and poll for
- completion events. This file descriptor is intentionally installed by
- this driver and not the CXL kernel services to allow for intermediary
- notification and access in the event of a non-user-initiated close(),
- such as a killed process. This design point is described in further
- detail in the description for the DK_CXLFLASH_DETACH ioctl.
-
- There are a few important aspects regarding the "tokens" (context id
- and fd2) that are provided back to the user:
-
- - These tokens are only valid for the process under which they
- were created. The child of a forked process cannot continue
- to use the context id or file descriptor created by its parent
- (see DK_CXLFLASH_VLUN_CLONE for further details).
-
- - These tokens are only valid for the lifetime of the context and
- the process under which they were created. Once either is
- destroyed, the tokens are to be considered stale and subsequent
- usage will result in errors.
-
- - A valid adapter file descriptor (fd2 >= 0) is only returned on
- the initial attach for a context. Subsequent attaches to an
- existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present)
- do not provide the adapter file descriptor as it was previously
- made known to the application.
-
- - When a context is no longer needed, the user shall detach from
- the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl
- returns with a valid adapter file descriptor and the return flag
- DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
- close the adapter file descriptor following a successful detach.
-
- - When this ioctl returns with a valid fd2 and the return flag
- DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
- close fd2 in the following circumstances:
-
- + Following a successful detach of the last user of the context
- + Following a successful recovery on the context's original fd2
- + In the child process of a fork(), following a clone ioctl,
- on the fd2 associated with the source context
-
- - At any time, a close on fd2 will invalidate the tokens. Applications
- should exercise caution to only close fd2 when appropriate (outlined
- in the previous bullet) to avoid premature loss of I/O.
-
-DK_CXLFLASH_USER_DIRECT
------------------------
- This ioctl is responsible for transitioning the LUN to direct
- (physical) mode access and configuring the AFU for direct access from
- user space on a per-context basis. Additionally, the block size and
- last logical block address (LBA) are returned to the user.
-
- As mentioned previously, when operating in user space access mode,
- LUNs may be accessed in whole or in part. Only one mode is allowed
- at a time and if one mode is active (outstanding references exist),
- requests to use the LUN in a different mode are denied.
-
- The AFU is configured for direct access from user space by adding an
- entry to the AFU's resource handle table. The index of the entry is
- treated as a resource handle that is returned to the user. The user
- is then able to use the handle to reference the LUN during I/O.
-
-DK_CXLFLASH_USER_VIRTUAL
-------------------------
- This ioctl is responsible for transitioning the LUN to virtual mode
- of access and configuring the AFU for virtual access from user space
- on a per-context basis. Additionally, the block size and last logical
- block address (LBA) are returned to the user.
-
- As mentioned previously, when operating in user space access mode,
- LUNs may be accessed in whole or in part. Only one mode is allowed
- at a time and if one mode is active (outstanding references exist),
- requests to use the LUN in a different mode are denied.
-
- The AFU is configured for virtual access from user space by adding
- an entry to the AFU's resource handle table. The index of the entry
- is treated as a resource handle that is returned to the user. The
- user is then able to use the handle to reference the LUN during I/O.
-
- By default, the virtual LUN is created with a size of 0. The user
- would need to use the DK_CXLFLASH_VLUN_RESIZE ioctl to adjust the grow
- the virtual LUN to a desired size. To avoid having to perform this
- resize for the initial creation of the virtual LUN, the user has the
- option of specifying a size as part of the DK_CXLFLASH_USER_VIRTUAL
- ioctl, such that when success is returned to the user, the
- resource handle that is provided is already referencing provisioned
- storage. This is reflected by the last LBA being a non-zero value.
-
- When a LUN is accessible from more than one port, this ioctl will
- return with the DK_CXLFLASH_ALL_PORTS_ACTIVE return flag set. This
- provides the user with a hint that I/O can be retried in the event
- of an I/O error as the LUN can be reached over multiple paths.
-
-DK_CXLFLASH_VLUN_RESIZE
------------------------
- This ioctl is responsible for resizing a previously created virtual
- LUN and will fail if invoked upon a LUN that is not in virtual
- mode. Upon success, an updated last LBA is returned to the user
- indicating the new size of the virtual LUN associated with the
- resource handle.
-
- The partitioning of virtual LUNs is jointly mediated by the cxlflash
- driver and the AFU. An allocation table is kept for each LUN that is
- operating in the virtual mode and used to program a LUN translation
- table that the AFU references when provided with a resource handle.
-
- This ioctl can return -EAGAIN if an AFU sync operation takes too long.
- In addition to returning a failure to user, cxlflash will also schedule
- an asynchronous AFU reset. Should the user choose to retry the operation,
- it is expected to succeed. If this ioctl fails with -EAGAIN, the user
- can either retry the operation or treat it as a failure.
-
-DK_CXLFLASH_RELEASE
--------------------
- This ioctl is responsible for releasing a previously obtained
- reference to either a physical or virtual LUN. This can be
- thought of as the inverse of the DK_CXLFLASH_USER_DIRECT or
- DK_CXLFLASH_USER_VIRTUAL ioctls. Upon success, the resource handle
- is no longer valid and the entry in the resource handle table is
- made available to be used again.
-
- As part of the release process for virtual LUNs, the virtual LUN
- is first resized to 0 to clear out and free the translation tables
- associated with the virtual LUN reference.
-
-DK_CXLFLASH_DETACH
-------------------
- This ioctl is responsible for unregistering a context with the
- cxlflash driver and release outstanding resources that were
- not explicitly released via the DK_CXLFLASH_RELEASE ioctl. Upon
- success, all "tokens" which had been provided to the user from the
- DK_CXLFLASH_ATTACH onward are no longer valid.
-
- When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
- attach, the application _must_ close the fd2 associated with the context
- following the detach of the final user of the context.
-
-DK_CXLFLASH_VLUN_CLONE
-----------------------
- This ioctl is responsible for cloning a previously created
- context to a more recently created context. It exists solely to
- support maintaining user space access to storage after a process
- forks. Upon success, the child process (which invoked the ioctl)
- will have access to the same LUNs via the same resource handle(s)
- as the parent, but under a different context.
-
- Context sharing across processes is not supported with CXL and
- therefore each fork must be met with establishing a new context
- for the child process. This ioctl simplifies the state management
- and playback required by a user in such a scenario. When a process
- forks, child process can clone the parents context by first creating
- a context (via DK_CXLFLASH_ATTACH) and then using this ioctl to
- perform the clone from the parent to the child.
-
- The clone itself is fairly simple. The resource handle and lun
- translation tables are copied from the parent context to the child's
- and then synced with the AFU.
-
- When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
- attach, the application _must_ close the fd2 associated with the source
- context (still resident/accessible in the parent process) following the
- clone. This is to avoid a stale entry in the file descriptor table of the
- child process.
-
- This ioctl can return -EAGAIN if an AFU sync operation takes too long.
- In addition to returning a failure to user, cxlflash will also schedule
- an asynchronous AFU reset. Should the user choose to retry the operation,
- it is expected to succeed. If this ioctl fails with -EAGAIN, the user
- can either retry the operation or treat it as a failure.
-
-DK_CXLFLASH_VERIFY
-------------------
- This ioctl is used to detect various changes such as the capacity of
- the disk changing, the number of LUNs visible changing, etc. In cases
- where the changes affect the application (such as a LUN resize), the
- cxlflash driver will report the changed state to the application.
-
- The user calls in when they want to validate that a LUN hasn't been
- changed in response to a check condition. As the user is operating out
- of band from the kernel, they will see these types of events without
- the kernel's knowledge. When encountered, the user's architected
- behavior is to call in to this ioctl, indicating what they want to
- verify and passing along any appropriate information. For now, only
- verifying a LUN change (ie: size different) with sense data is
- supported.
-
-DK_CXLFLASH_RECOVER_AFU
------------------------
- This ioctl is used to drive recovery (if such an action is warranted)
- of a specified user context. Any state associated with the user context
- is re-established upon successful recovery.
-
- User contexts are put into an error condition when the device needs to
- be reset or is terminating. Users are notified of this error condition
- by seeing all 0xF's on an MMIO read. Upon encountering this, the
- architected behavior for a user is to call into this ioctl to recover
- their context. A user may also call into this ioctl at any time to
- check if the device is operating normally. If a failure is returned
- from this ioctl, the user is expected to gracefully clean up their
- context via release/detach ioctls. Until they do, the context they
- hold is not relinquished. The user may also optionally exit the process
- at which time the context/resources they held will be freed as part of
- the release fop.
-
- When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
- attach, the application _must_ unmap and close the fd2 associated with the
- original context following this ioctl returning success and indicating that
- the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET).
-
-DK_CXLFLASH_MANAGE_LUN
-----------------------
- This ioctl is used to switch a LUN from a mode where it is available
- for file-system access (legacy), to a mode where it is set aside for
- exclusive user space access (superpipe). In case a LUN is visible
- across multiple ports and adapters, this ioctl is used to uniquely
- identify each LUN by its World Wide Node Name (WWNN).
-
-
-CXL Flash Driver Host IOCTLs
-============================
-
- Each host adapter instance that is supported by the cxlflash driver
- has a special character device associated with it to enable a set of
- host management function. These character devices are hosted in a
- class dedicated for cxlflash and can be accessed via `/dev/cxlflash/*`.
-
- Applications can be written to perform various functions using the
- host ioctl APIs below.
-
- The structure definitions for these IOCTLs are available in:
- uapi/scsi/cxlflash_ioctl.h
-
-HT_CXLFLASH_LUN_PROVISION
--------------------------
- This ioctl is used to create and delete persistent LUNs on cxlflash
- devices that lack an external LUN management interface. It is only
- valid when used with AFUs that support the LUN provision capability.
-
- When sufficient space is available, LUNs can be created by specifying
- the target port to host the LUN and a desired size in 4K blocks. Upon
- success, the LUN ID and WWID of the created LUN will be returned and
- the SCSI bus can be scanned to detect the change in LUN topology. Note
- that partial allocations are not supported. Should a creation fail due
- to a space issue, the target port can be queried for its current LUN
- geometry.
-
- To remove a LUN, the device must first be disassociated from the Linux
- SCSI subsystem. The LUN deletion can then be initiated by specifying a
- target port and LUN ID. Upon success, the LUN geometry associated with
- the port will be updated to reflect new number of provisioned LUNs and
- available capacity.
-
- To query the LUN geometry of a port, the target port is specified and
- upon success, the following information is presented:
-
- - Maximum number of provisioned LUNs allowed for the port
- - Current number of provisioned LUNs for the port
- - Maximum total capacity of provisioned LUNs for the port (4K blocks)
- - Current total capacity of provisioned LUNs for the port (4K blocks)
-
- With this information, the number of available LUNs and capacity can be
- can be calculated.
-
-HT_CXLFLASH_AFU_DEBUG
----------------------
- This ioctl is used to debug AFUs by supporting a command pass-through
- interface. It is only valid when used with AFUs that support the AFU
- debug capability.
-
- With exception of buffer management, AFU debug commands are opaque to
- cxlflash and treated as pass-through. For debug commands that do require
- data transfer, the user supplies an adequately sized data buffer and must
- specify the data transfer direction with respect to the host. There is a
- maximum transfer size of 256K imposed. Note that partial read completions
- are not supported - when errors are experienced with a host read data
- transfer, the data buffer is not copied back to the user.
diff --git a/Documentation/arch/powerpc/dexcr.rst b/Documentation/arch/powerpc/dexcr.rst
index 615a631f51fa..ab0724212fcd 100644
--- a/Documentation/arch/powerpc/dexcr.rst
+++ b/Documentation/arch/powerpc/dexcr.rst
@@ -36,8 +36,145 @@ state for a process.
Configuration
=============
-The DEXCR is currently unconfigurable. All threads are run with the
-NPHIE aspect enabled.
+prctl
+-----
+
+A process can control its own userspace DEXCR value using the
+``PR_PPC_GET_DEXCR`` and ``PR_PPC_SET_DEXCR`` pair of
+:manpage:`prctl(2)` commands. These calls have the form::
+
+ prctl(PR_PPC_GET_DEXCR, unsigned long which, 0, 0, 0);
+ prctl(PR_PPC_SET_DEXCR, unsigned long which, unsigned long ctrl, 0, 0);
+
+The possible 'which' and 'ctrl' values are as follows. Note there is no relation
+between the 'which' value and the DEXCR aspect's index.
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 7 1
+
+ * - ``prctl()`` which
+ - Aspect name
+ - Aspect index
+
+ * - ``PR_PPC_DEXCR_SBHE``
+ - Speculative Branch Hint Enable (SBHE)
+ - 0
+
+ * - ``PR_PPC_DEXCR_IBRTPD``
+ - Indirect Branch Recurrent Target Prediction Disable (IBRTPD)
+ - 3
+
+ * - ``PR_PPC_DEXCR_SRAPD``
+ - Subroutine Return Address Prediction Disable (SRAPD)
+ - 4
+
+ * - ``PR_PPC_DEXCR_NPHIE``
+ - Non-Privileged Hash Instruction Enable (NPHIE)
+ - 5
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 8
+
+ * - ``prctl()`` ctrl
+ - Meaning
+
+ * - ``PR_PPC_DEXCR_CTRL_EDITABLE``
+ - This aspect can be configured with PR_PPC_SET_DEXCR (get only)
+
+ * - ``PR_PPC_DEXCR_CTRL_SET``
+ - This aspect is set / set this aspect
+
+ * - ``PR_PPC_DEXCR_CTRL_CLEAR``
+ - This aspect is clear / clear this aspect
+
+ * - ``PR_PPC_DEXCR_CTRL_SET_ONEXEC``
+ - This aspect will be set after exec / set this aspect after exec
+
+ * - ``PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC``
+ - This aspect will be clear after exec / clear this aspect after exec
+
+Note that
+
+* which is a plain value, not a bitmask. Aspects must be worked with individually.
+
+* ctrl is a bitmask. ``PR_PPC_GET_DEXCR`` returns both the current and onexec
+ configuration. For example, ``PR_PPC_GET_DEXCR`` may return
+ ``PR_PPC_DEXCR_CTRL_EDITABLE | PR_PPC_DEXCR_CTRL_SET |
+ PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC``. This would indicate the aspect is currently
+ set, it will be cleared when you run exec, and you can change this with the
+ ``PR_PPC_SET_DEXCR`` prctl.
+
+* The set/clear terminology refers to setting/clearing the bit in the DEXCR.
+ For example::
+
+ prctl(PR_PPC_SET_DEXCR, PR_PPC_DEXCR_IBRTPD, PR_PPC_DEXCR_CTRL_SET, 0, 0);
+
+ will set the IBRTPD aspect bit in the DEXCR, causing indirect branch prediction
+ to be disabled.
+
+* The status returned by ``PR_PPC_GET_DEXCR`` represents what value the process
+ would like applied. It does not include any alternative overrides, such as if
+ the hypervisor is enforcing the aspect be set. To see the true DEXCR state
+ software should read the appropriate SPRs directly.
+
+* The aspect state when starting a process is copied from the parent's state on
+ :manpage:`fork(2)`. The state is reset to a fixed value on
+ :manpage:`execve(2)`. The PR_PPC_SET_DEXCR prctl() can control both of these
+ values.
+
+* The ``*_ONEXEC`` controls do not change the current process's DEXCR.
+
+Use ``PR_PPC_SET_DEXCR`` with one of ``PR_PPC_DEXCR_CTRL_SET`` or
+``PR_PPC_DEXCR_CTRL_CLEAR`` to edit a given aspect.
+
+Common error codes for both getting and setting the DEXCR are as follows:
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 8
+
+ * - Error
+ - Meaning
+
+ * - ``EINVAL``
+ - The DEXCR is not supported by the kernel.
+
+ * - ``ENODEV``
+ - The aspect is not recognised by the kernel or not supported by the
+ hardware.
+
+``PR_PPC_SET_DEXCR`` may also report the following error codes:
+
+.. flat-table::
+ :header-rows: 1
+ :widths: 2 8
+
+ * - Error
+ - Meaning
+
+ * - ``EINVAL``
+ - The ctrl value contains unrecognised flags.
+
+ * - ``EINVAL``
+ - The ctrl value contains mutually conflicting flags (e.g.,
+ ``PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR``)
+
+ * - ``EPERM``
+ - This aspect cannot be modified with prctl() (check for the
+ PR_PPC_DEXCR_CTRL_EDITABLE flag with PR_PPC_GET_DEXCR).
+
+ * - ``EPERM``
+ - The process does not have sufficient privilege to perform the operation.
+ For example, clearing NPHIE on exec is a privileged operation (a process
+ can still clear its own NPHIE aspect without privileges).
+
+This interface allows a process to control its own DEXCR aspects, and also set
+the initial DEXCR value for any children in its process tree (up to the next
+child to use an ``*_ONEXEC`` control). This allows fine-grained control over the
+default value of the DEXCR, for example allowing containers to run with different
+default values.
coredump and ptrace
diff --git a/Documentation/arch/powerpc/eeh-pci-error-recovery.rst b/Documentation/arch/powerpc/eeh-pci-error-recovery.rst
index d6643a91bdf8..153d0af055b6 100644
--- a/Documentation/arch/powerpc/eeh-pci-error-recovery.rst
+++ b/Documentation/arch/powerpc/eeh-pci-error-recovery.rst
@@ -315,7 +315,6 @@ network daemons and file systems that didn't need to be disturbed.
ideally, the reset should happen at or below the block layer,
so that the file systems are not disturbed.
- Reiserfs does not tolerate errors returned from the block device.
Ext3fs seems to be tolerant, retrying reads/writes until it does
succeed. Both have been only lightly tested in this scenario.
diff --git a/Documentation/arch/powerpc/elf_hwcaps.rst b/Documentation/arch/powerpc/elf_hwcaps.rst
index 4c896cf077c2..fce7489877b5 100644
--- a/Documentation/arch/powerpc/elf_hwcaps.rst
+++ b/Documentation/arch/powerpc/elf_hwcaps.rst
@@ -91,6 +91,7 @@ PPC_FEATURE_HAS_MMU
PPC_FEATURE_HAS_4xxMAC
The processor is 40x or 44x family.
+ Unused in the kernel since 732b32daef80 ("powerpc: Remove core support for 40x")
PPC_FEATURE_UNIFIED_CACHE
The processor has a unified L1 cache for instructions and data, as
diff --git a/Documentation/arch/powerpc/firmware-assisted-dump.rst b/Documentation/arch/powerpc/firmware-assisted-dump.rst
index e363fc48529a..7e266e749cd5 100644
--- a/Documentation/arch/powerpc/firmware-assisted-dump.rst
+++ b/Documentation/arch/powerpc/firmware-assisted-dump.rst
@@ -120,6 +120,28 @@ to ensure that crash data is preserved to process later.
e.g.
# echo 1 > /sys/firmware/opal/mpipl/release_core
+-- Support for Additional Kernel Arguments in Fadump
+ Fadump has a feature that allows passing additional kernel arguments
+ to the fadump kernel. This feature was primarily designed to disable
+ kernel functionalities that are not required for the fadump kernel
+ and to reduce its memory footprint while collecting the dump.
+
+ Command to Add Additional Kernel Parameters to Fadump:
+ e.g.
+ # echo "nr_cpus=16" > /sys/kernel/fadump/bootargs_append
+
+ The above command is sufficient to add additional arguments to fadump.
+ An explicit service restart is not required.
+
+ Command to Retrieve the Additional Fadump Arguments:
+ e.g.
+ # cat /sys/kernel/fadump/bootargs_append
+
+Note: Additional kernel arguments for fadump with HASH MMU is only
+ supported if the RMA size is greater than 768 MB. If the RMA
+ size is less than 768 MB, the kernel does not export the
+ /sys/kernel/fadump/bootargs_append sysfs node.
+
Implementation details:
-----------------------
@@ -134,12 +156,12 @@ that are run. If there is dump data, then the
memory is held.
If there is no waiting dump data, then only the memory required to
-hold CPU state, HPTE region, boot memory dump, FADump header and
-elfcore header, is usually reserved at an offset greater than boot
-memory size (see Fig. 1). This area is *not* released: this region
-will be kept permanently reserved, so that it can act as a receptacle
-for a copy of the boot memory content in addition to CPU state and
-HPTE region, in the case a crash does occur.
+hold CPU state, HPTE region, boot memory dump, and FADump header is
+usually reserved at an offset greater than boot memory size (see Fig. 1).
+This area is *not* released: this region will be kept permanently
+reserved, so that it can act as a receptacle for a copy of the boot
+memory content in addition to CPU state and HPTE region, in the case
+a crash does occur.
Since this reserved memory area is used only after the system crash,
there is no point in blocking this significant chunk of memory from
@@ -153,22 +175,22 @@ that were present in CMA region::
o Memory Reservation during first kernel
- Low memory Top of memory
- 0 boot memory size |<--- Reserved dump area --->| |
- | | | Permanent Reservation | |
- V V | | V
- +-----------+-----/ /---+---+----+-------+-----+-----+----+--+
- | | |///|////| DUMP | HDR | ELF |////| |
- +-----------+-----/ /---+---+----+-------+-----+-----+----+--+
- | ^ ^ ^ ^ ^
- | | | | | |
- \ CPU HPTE / | |
- ------------------------------ | |
- Boot memory content gets transferred | |
- to reserved area by firmware at the | |
- time of crash. | |
- FADump Header |
- (meta area) |
+ Low memory Top of memory
+ 0 boot memory size |<------ Reserved dump area ----->| |
+ | | | Permanent Reservation | |
+ V V | | V
+ +-----------+-----/ /---+---+----+-----------+-------+----+-----+
+ | | |///|////| DUMP | HDR |////| |
+ +-----------+-----/ /---+---+----+-----------+-------+----+-----+
+ | ^ ^ ^ ^ ^
+ | | | | | |
+ \ CPU HPTE / | |
+ -------------------------------- | |
+ Boot memory content gets transferred | |
+ to reserved area by firmware at the | |
+ time of crash. | |
+ FADump Header |
+ (meta area) |
|
|
Metadata: This area holds a metadata structure whose
@@ -186,13 +208,20 @@ that were present in CMA region::
0 boot memory size |
| |<------------ Crash preserved area ------------>|
V V |<--- Reserved dump area --->| |
- +-----------+-----/ /---+---+----+-------+-----+-----+----+--+
- | | |///|////| DUMP | HDR | ELF |////| |
- +-----------+-----/ /---+---+----+-------+-----+-----+----+--+
- | |
- V V
- Used by second /proc/vmcore
- kernel to boot
+ +----+---+--+-----/ /---+---+----+-------+-----+-----+-------+
+ | |ELF| | |///|////| DUMP | HDR |/////| |
+ +----+---+--+-----/ /---+---+----+-------+-----+-----+-------+
+ | | | | | |
+ ----- ------------------------------ ---------------
+ \ | |
+ \ | |
+ \ | |
+ \ | ----------------------------
+ \ | /
+ \ | /
+ \ | /
+ /proc/vmcore
+
+---+
|///| -> Regions (CPU, HPTE & Metadata) marked like this in the above
@@ -200,6 +229,12 @@ that were present in CMA region::
does not have CPU & HPTE regions while Metadata region is
not supported on pSeries currently.
+ +---+
+ |ELF| -> elfcorehdr, it is created in second kernel after crash.
+ +---+
+
+ Note: Memory from 0 to the boot memory size is used by second kernel
+
Fig. 2
@@ -353,26 +388,6 @@ TODO:
- Need to come up with the better approach to find out more
accurate boot memory size that is required for a kernel to
boot successfully when booted with restricted memory.
- - The FADump implementation introduces a FADump crash info structure
- in the scratch area before the ELF core header. The idea of introducing
- this structure is to pass some important crash info data to the second
- kernel which will help second kernel to populate ELF core header with
- correct data before it gets exported through /proc/vmcore. The current
- design implementation does not address a possibility of introducing
- additional fields (in future) to this structure without affecting
- compatibility. Need to come up with the better approach to address this.
-
- The possible approaches are:
-
- 1. Introduce version field for version tracking, bump up the version
- whenever a new field is added to the structure in future. The version
- field can be used to find out what fields are valid for the current
- version of the structure.
- 2. Reserve the area of predefined size (say PAGE_SIZE) for this
- structure and have unused area as reserved (initialized to zero)
- for future field additions.
-
- The advantage of approach 1 over 2 is we don't need to reserve extra space.
Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
diff --git a/Documentation/arch/powerpc/htm.rst b/Documentation/arch/powerpc/htm.rst
new file mode 100644
index 000000000000..fcb4eb6306b1
--- /dev/null
+++ b/Documentation/arch/powerpc/htm.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _htm:
+
+===================================
+HTM (Hardware Trace Macro)
+===================================
+
+Athira Rajeev, 2 Mar 2025
+
+.. contents::
+ :depth: 3
+
+
+Basic overview
+==============
+
+H_HTM is used as an interface for executing Hardware Trace Macro (HTM)
+functions, including setup, configuration, control and dumping of the HTM data.
+For using HTM, it is required to setup HTM buffers and HTM operations can
+be controlled using the H_HTM hcall. The hcall can be invoked for any core/chip
+of the system from within a partition itself. To use this feature, a debugfs
+folder called "htmdump" is present under /sys/kernel/debug/powerpc.
+
+
+HTM debugfs example usage
+=========================
+
+.. code-block:: sh
+
+ # ls /sys/kernel/debug/powerpc/htmdump/
+ coreindexonchip htmcaps htmconfigure htmflags htminfo htmsetup
+ htmstart htmstatus htmtype nodalchipindex nodeindex trace
+
+Details on each file:
+
+* nodeindex, nodalchipindex, coreindexonchip specifies which partition to configure the HTM for.
+* htmtype: specifies the type of HTM. Supported target is hardwareTarget.
+* trace: is to read the HTM data.
+* htmconfigure: Configure/Deconfigure the HTM. Writing 1 to the file will configure the trace, writing 0 to the file will do deconfigure.
+* htmstart: start/Stop the HTM. Writing 1 to the file will start the tracing, writing 0 to the file will stop the tracing.
+* htmstatus: get the status of HTM. This is needed to understand the HTM state after each operation.
+* htmsetup: set the HTM buffer size. Size of HTM buffer is in power of 2
+* htminfo: provides the system processor configuration details. This is needed to understand the appropriate values for nodeindex, nodalchipindex, coreindexonchip.
+* htmcaps : provides the HTM capabilities like minimum/maximum buffer size, what kind of tracing the HTM supports etc.
+* htmflags : allows to pass flags to hcall. Currently supports controlling the wrapping of HTM buffer.
+
+To see the system processor configuration details:
+
+.. code-block:: sh
+
+ # cat /sys/kernel/debug/powerpc/htmdump/htminfo > htminfo_file
+
+The result can be interpreted using hexdump.
+
+To collect HTM traces for a partition represented by nodeindex as
+zero, nodalchipindex as 1 and coreindexonchip as 12
+
+.. code-block:: sh
+
+ # cd /sys/kernel/debug/powerpc/htmdump/
+ # echo 2 > htmtype
+ # echo 33 > htmsetup ( sets 8GB memory for HTM buffer, number is size in power of 2 )
+
+This requires a CEC reboot to get the HTM buffers allocated.
+
+.. code-block:: sh
+
+ # cd /sys/kernel/debug/powerpc/htmdump/
+ # echo 2 > htmtype
+ # echo 0 > nodeindex
+ # echo 1 > nodalchipindex
+ # echo 12 > coreindexonchip
+ # echo 1 > htmflags # to set noWrap for HTM buffers
+ # echo 1 > htmconfigure # Configure the HTM
+ # echo 1 > htmstart # Start the HTM
+ # echo 0 > htmstart # Stop the HTM
+ # echo 0 > htmconfigure # Deconfigure the HTM
+ # cat htmstatus # Dump the status of HTM entries as data
+
+Above will set the htmtype and core details, followed by executing respective HTM operation.
+
+Read the HTM trace data
+========================
+
+After starting the trace collection, run the workload
+of interest. Stop the trace collection after required period
+of time, and read the trace file.
+
+.. code-block:: sh
+
+ # cat /sys/kernel/debug/powerpc/htmdump/trace > trace_file
+
+This trace file will contain the relevant instruction traces
+collected during the workload execution. And can be used as
+input file for trace decoders to understand data.
+
+Benefits of using HTM debugfs interface
+=======================================
+
+It is now possible to collect traces for a particular core/chip
+from within any partition of the system and decode it. Through
+this enablement, a small partition can be dedicated to collect the
+trace data and analyze to provide important information for Performance
+analysis, Software tuning, or Hardware debug.
diff --git a/Documentation/arch/powerpc/index.rst b/Documentation/arch/powerpc/index.rst
index 9749f6dc258f..1be2ee3f0361 100644
--- a/Documentation/arch/powerpc/index.rst
+++ b/Documentation/arch/powerpc/index.rst
@@ -12,8 +12,6 @@ powerpc
bootwrapper
cpu_families
cpu_features
- cxl
- cxlflash
dawr-power9
dexcr
dscr
@@ -21,6 +19,7 @@ powerpc
elf_hwcaps
elfnote
firmware-assisted-dump
+ htm
hvcs
imc
isa-versions
@@ -38,6 +37,7 @@ powerpc
vas-api
vcpudispatch_stats
vmemmap_dedup
+ vpa-dtl
features
diff --git a/Documentation/arch/powerpc/kvm-nested.rst b/Documentation/arch/powerpc/kvm-nested.rst
index 630602a8aa00..574592505604 100644
--- a/Documentation/arch/powerpc/kvm-nested.rst
+++ b/Documentation/arch/powerpc/kvm-nested.rst
@@ -208,13 +208,9 @@ associated values for each ID in the GSB::
flags:
Bit 0: getGuestWideState: Request state of the Guest instead
of an individual VCPU.
- Bit 1: takeOwnershipOfVcpuState Indicate the L1 is taking
- over ownership of the VCPU state and that the L0 can free
- the storage holding the state. The VCPU state will need to
- be returned to the Hypervisor via H_GUEST_SET_STATE prior
- to H_GUEST_RUN_VCPU being called for this VCPU. The data
- returned in the dataBuffer is in a Hypervisor internal
- format.
+ Bit 1: getHostWideState: Request stats of the Host. This causes
+ the guestId and vcpuId parameters to be ignored and attempting
+ to get the VCPU/Guest state will cause an error.
Bits 2-63: Reserved
guestId: ID obtained from H_GUEST_CREATE
vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU
@@ -406,9 +402,10 @@ the partition like the timebase offset and partition scoped page
table information.
+--------+-------+----+--------+----------------------------------+
-| ID | Size | RW | Thread | Details |
-| | Bytes | | Guest | |
-| | | | Scope | |
+| ID | Size | RW |(H)ost | Details |
+| | Bytes | |(G)uest | |
+| | | |(T)hread| |
+| | | |Scope | |
+========+=======+====+========+==================================+
| 0x0000 | | RW | TG | NOP element |
+--------+-------+----+--------+----------------------------------+
@@ -434,6 +431,29 @@ table information.
| | | | |- 0x8 Table size. |
+--------+-------+----+--------+----------------------------------+
| 0x0007-| | | | Reserved |
+| 0x07FF | | | | |
++--------+-------+----+--------+----------------------------------+
+| 0x0800 | 0x08 | R | H | Current usage in bytes of the |
+| | | | | L0's Guest Management Space |
+| | | | | for an L1-Lpar. |
++--------+-------+----+--------+----------------------------------+
+| 0x0801 | 0x08 | R | H | Max bytes available in the |
+| | | | | L0's Guest Management Space for |
+| | | | | an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0802 | 0x08 | R | H | Current usage in bytes of the |
+| | | | | L0's Guest Page Table Management |
+| | | | | Space for an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0803 | 0x08 | R | H | Max bytes available in the L0's |
+| | | | | Guest Page Table Management |
+| | | | | Space for an L1-Lpar |
++--------+-------+----+--------+----------------------------------+
+| 0x0804 | 0x08 | R | H | Cumulative Reclaimed bytes from |
+| | | | | L0 Guest's Page Table Management |
+| | | | | Space due to overcommit |
++--------+-------+----+--------+----------------------------------+
+| 0x0805-| | | | Reserved |
| 0x0BFF | | | | |
+--------+-------+----+--------+----------------------------------+
| 0x0C00 | 0x10 | RW | T |Run vCPU Input Buffer: |
@@ -546,7 +566,9 @@ table information.
+--------+-------+----+--------+----------------------------------+
| 0x1052 | 0x08 | RW | T | CTRL |
+--------+-------+----+--------+----------------------------------+
-| 0x1053-| | | | Reserved |
+| 0x1053 | 0x08 | RW | T | DPDES |
++--------+-------+----+--------+----------------------------------+
+| 0x1054-| | | | Reserved |
| 0x1FFF | | | | |
+--------+-------+----+--------+----------------------------------+
| 0x2000 | 0x04 | RW | T | CR |
diff --git a/Documentation/arch/powerpc/papr_hcalls.rst b/Documentation/arch/powerpc/papr_hcalls.rst
index 80d2c0aadab5..805e1cb9bab9 100644
--- a/Documentation/arch/powerpc/papr_hcalls.rst
+++ b/Documentation/arch/powerpc/papr_hcalls.rst
@@ -289,6 +289,17 @@ to be issued multiple times in order to be completely serviced. The
subsequent hcalls to the hypervisor until the hcall is completely serviced
at which point H_SUCCESS or other error is returned by the hypervisor.
+**H_HTM**
+
+| Input: flags, target, operation (op), op-param1, op-param2, op-param3
+| Out: *dumphtmbufferdata*
+| Return Value: *H_Success,H_Busy,H_LongBusyOrder,H_Partial,H_Parameter,
+ H_P2,H_P3,H_P4,H_P5,H_P6,H_State,H_Not_Available,H_Authority*
+
+H_HTM supports setup, configuration, control and dumping of Hardware Trace
+Macro (HTM) function and its data. HTM buffer stores tracing data for functions
+like core instruction, core LLAT and nest.
+
References
==========
.. [1] "Power Architecture Platform Reference"
diff --git a/Documentation/arch/powerpc/ultravisor.rst b/Documentation/arch/powerpc/ultravisor.rst
index ba6b1bf1cc44..6d0407b2f5a1 100644
--- a/Documentation/arch/powerpc/ultravisor.rst
+++ b/Documentation/arch/powerpc/ultravisor.rst
@@ -134,7 +134,7 @@ Hardware
* PTCR and partition table entries (partition table is in secure
memory). An attempt to write to PTCR will cause a Hypervisor
- Emulation Assitance interrupt.
+ Emulation Assistance interrupt.
* LDBAR (LD Base Address Register) and IMC (In-Memory Collection)
non-architected registers. An attempt to write to them will cause a
diff --git a/Documentation/arch/powerpc/vpa-dtl.rst b/Documentation/arch/powerpc/vpa-dtl.rst
new file mode 100644
index 000000000000..58d0022f993a
--- /dev/null
+++ b/Documentation/arch/powerpc/vpa-dtl.rst
@@ -0,0 +1,156 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. _vpa-dtl:
+
+===================================
+DTL (Dispatch Trace Log)
+===================================
+
+Athira Rajeev, 19 April 2025
+
+.. contents::
+ :depth: 3
+
+
+Basic overview
+==============
+
+The pseries Shared Processor Logical Partition(SPLPAR) machines can
+retrieve a log of dispatch and preempt events from the hypervisor
+using data from Disptach Trace Log(DTL) buffer. With this information,
+user can retrieve when and why each dispatch & preempt has occurred.
+The vpa-dtl PMU exposes the Virtual Processor Area(VPA) DTL counters
+via perf.
+
+Infrastructure used
+===================
+
+The VPA DTL PMU counters do not interrupt on overflow or generate any
+PMI interrupts. Therefore, hrtimer is used to poll the DTL data. The timer
+nterval can be provided by user via sample_period field in nano seconds.
+vpa dtl pmu has one hrtimer added per vpa-dtl pmu thread. DTL (Dispatch
+Trace Log) contains information about dispatch/preempt, enqueue time etc.
+We directly copy the DTL buffer data as part of auxiliary buffer and it
+will be processed later. This will avoid time taken to create samples
+in the kernel space. The PMU driver collecting Dispatch Trace Log (DTL)
+entries makes use of AUX support in perf infrastructure. On the tools side,
+this data is made available as PERF_RECORD_AUXTRACE records.
+
+To correlate each DTL entry with other events across CPU's, an auxtrace_queue
+is created for each CPU. Each auxtrace queue has a array/list of auxtrace buffers.
+All auxtrace queues is maintained in auxtrace heap. The queues are sorted
+based on timestamp. When the different PERF_RECORD_XX records are processed,
+compare the timestamp of perf record with timestamp of top element in the
+auxtrace heap so that DTL events can be co-related with other events
+Process the auxtrace queue if the timestamp of element from heap is
+lower than timestamp from entry in perf record. Sometimes it could happen that
+one buffer is only partially processed. if the timestamp of occurrence of
+another event is more than currently processed element in the queue, it will
+move on to next perf record. So keep track of position of buffer to continue
+processing next time. Update the timestamp of the auxtrace heap with the timestamp
+of last processed entry from the auxtrace buffer.
+
+This infrastructure ensures dispatch trace log entries can be correlated
+and presented along with other events like sched.
+
+vpa-dtl PMU example usage
+=========================
+
+.. code-block:: sh
+
+ # ls /sys/devices/vpa_dtl/
+ events format perf_event_mux_interval_ms power subsystem type uevent
+
+
+To capture the DTL data using perf record:
+.. code-block:: sh
+
+ # ./perf record -a -e sched:\*,vpa_dtl/dtl_all/ -c 1000000000 sleep 1
+
+The result can be interpreted using perf record. Snippet of perf report -D
+
+.. code-block:: sh
+
+ # ./perf report -D
+
+There are different PERF_RECORD_XX records. In that records corresponding to
+auxtrace buffers includes:
+
+1. PERF_RECORD_AUX
+ Conveys that new data is available in AUX area
+
+2. PERF_RECORD_AUXTRACE_INFO
+ Describes offset and size of auxtrace data in the buffers
+
+3. PERF_RECORD_AUXTRACE
+ This is the record that defines the auxtrace data which here in case of
+ vpa-dtl pmu is dispatch trace log data.
+
+Snippet from perf report -D showing the PERF_RECORD_AUXTRACE dump
+
+.. code-block:: sh
+
+0 0 0x39b10 [0x30]: PERF_RECORD_AUXTRACE size: 0x690 offset: 0 ref: 0 idx: 0 tid: -1 cpu: 0
+.
+. ... VPA DTL PMU data: size 1680 bytes, entries is 35
+. 00000000: boot_tb: 21349649546353231, tb_freq: 512000000
+. 00000030: dispatch_reason:decrementer interrupt, preempt_reason:H_CEDE, enqueue_to_dispatch_time:7064, ready_to_enqueue_time:187, waiting_to_ready_time:6611773
+. 00000060: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:146, ready_to_enqueue_time:0, waiting_to_ready_time:15359437
+. 00000090: dispatch_reason:decrementer interrupt, preempt_reason:H_CEDE, enqueue_to_dispatch_time:4868, ready_to_enqueue_time:232, waiting_to_ready_time:5100709
+. 000000c0: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:179, ready_to_enqueue_time:0, waiting_to_ready_time:30714243
+. 000000f0: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:197, ready_to_enqueue_time:0, waiting_to_ready_time:15350648
+. 00000120: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:213, ready_to_enqueue_time:0, waiting_to_ready_time:15353446
+. 00000150: dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:212, ready_to_enqueue_time:0, waiting_to_ready_time:15355126
+. 00000180: dispatch_reason:decrementer interrupt, preempt_reason:H_CEDE, enqueue_to_dispatch_time:6368, ready_to_enqueue_time:164, waiting_to_ready_time:5104665
+
+Above is representation of dtl entry of below format:
+
+struct dtl_entry {
+ u8 dispatch_reason;
+ u8 preempt_reason;
+ u16 processor_id;
+ u32 enqueue_to_dispatch_time;
+ u32 ready_to_enqueue_time;
+ u32 waiting_to_ready_time;
+ u64 timebase;
+ u64 fault_addr;
+ u64 srr0;
+ u64 srr1;
+
+};
+
+First two fields represent the dispatch reason and preempt reason. The post
+processing of PERF_RECORD_AUXTRACE records will translate to meaningful data
+for user to consume.
+
+Visualize the dispatch trace log entries with perf report
+=========================================================
+
+.. code-block:: sh
+
+ # ./perf record -a -e sched:*,vpa_dtl/dtl_all/ -c 1000000000 sleep 1
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.300 MB perf.data ]
+
+ # ./perf report
+ # Samples: 321 of event 'vpa-dtl'
+ # Event count (approx.): 321
+ #
+ # Children Self Command Shared Object Symbol
+ # ........ ........ ....... ................. ..............................
+ #
+ 100.00% 100.00% swapper [kernel.kallsyms] [k] plpar_hcall_norets_notrace
+
+Visualize the dispatch trace log entries with perf script
+=========================================================
+
+.. code-block:: sh
+
+ # ./perf script
+ migration/9 67 [009] 105373.359903: sched:sched_waking: comm=perf pid=13418 prio=120 target_cpu=009
+ migration/9 67 [009] 105373.359904: sched:sched_migrate_task: comm=perf pid=13418 prio=120 orig_cpu=9 dest_cpu=10
+ migration/9 67 [009] 105373.359907: sched:sched_stat_runtime: comm=migration/9 pid=67 runtime=4050 [ns]
+ migration/9 67 [009] 105373.359908: sched:sched_switch: prev_comm=migration/9 prev_pid=67 prev_prio=0 prev_state=S ==> next_comm=swapper/9 next_pid=0 next_prio=120
+ :256 256 [016] 105373.359913: vpa-dtl: timebase: 21403600706628832 dispatch_reason:decrementer interrupt, preempt_reason:H_CEDE, enqueue_to_dispatch_time:4854, ready_to_enqueue_time:139, waiting_to_ready_time:511842115 c0000000000fcd28 plpar_hcall_norets_notrace+0x18 ([kernel.kallsyms])
+ :256 256 [017] 105373.360012: vpa-dtl: timebase: 21403600706679454 dispatch_reason:priv doorbell, preempt_reason:H_CEDE, enqueue_to_dispatch_time:236, ready_to_enqueue_time:0, waiting_to_ready_time:133864583 c0000000000fcd28 plpar_hcall_norets_notrace+0x18 ([kernel.kallsyms])
+ perf 13418 [010] 105373.360048: sched:sched_stat_runtime: comm=perf pid=13418 runtime=139748 [ns]
+ perf 13418 [010] 105373.360052: sched:sched_waking: comm=migration/10 pid=72 prio=0 target_cpu=010
diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst
new file mode 100644
index 000000000000..40ba53bed5df
--- /dev/null
+++ b/Documentation/arch/riscv/cmodx.rst
@@ -0,0 +1,130 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================================================
+Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
+==============================================================================
+
+CMODX is a programming technique where a program executes instructions that were
+modified by the program itself. Instruction storage and the instruction cache
+(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
+program must enforce its own synchronization with the unprivileged fence.i
+instruction.
+
+CMODX in the Kernel Space
+-------------------------
+
+Dynamic ftrace
+---------------------
+
+Essentially, dynamic ftrace directs the control flow by inserting a function
+call at each patchable function entry, and patches it dynamically at runtime to
+enable or disable the redirection. In the case of RISC-V, 2 instructions,
+AUIPC + JALR, are required to compose a function call. However, it is impossible
+to patch 2 instructions and expect that a concurrent read-side executes them
+without a race condition. This series makes atmoic code patching possible in
+RISC-V ftrace. Kernel preemption makes things even worse as it allows the old
+state to persist across the patching process with stop_machine().
+
+In order to get rid of stop_machine() and run dynamic ftrace with full kernel
+preemption, we partially initialize each patchable function entry at boot-time,
+setting the first instruction to AUIPC, and the second to NOP. Now, atmoic
+patching is possible because the kernel only has to update one instruction.
+According to Ziccif, as long as an instruction is naturally aligned, the ISA
+guarantee an atomic update.
+
+By fixing down the first instruction, AUIPC, the range of the ftrace trampoline
+is limited to +-2K from the predetermined target, ftrace_caller, due to the lack
+of immediate encoding space in RISC-V. To address the issue, we introduce
+CALL_OPS, where an 8B naturally align metadata is added in front of each
+pacthable function. The metadata is resolved at the first trampoline, then the
+execution can be derect to another custom trampoline.
+
+CMODX in the User Space
+-----------------------
+
+Though fence.i is an unprivileged instruction, the default Linux ABI prohibits
+the use of fence.i in userspace applications. At any point the scheduler may
+migrate a task onto a new hart. If migration occurs after the userspace
+synchronized the icache and instruction storage with fence.i, the icache on the
+new hart will no longer be clean. This is due to the behavior of fence.i only
+affecting the hart that it is called on. Thus, the hart that the task has been
+migrated to may not have synchronized instruction storage and icache.
+
+There are two ways to solve this problem: use the riscv_flush_icache() syscall,
+or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
+userspace. The syscall performs a one-off icache flushing operation. The prctl
+changes the Linux ABI to allow userspace to emit icache flushing operations.
+
+As an aside, "deferred" icache flushes can sometimes be triggered in the kernel.
+At the time of writing, this only occurs during the riscv_flush_icache() syscall
+and when the kernel uses copy_to_user_page(). These deferred flushes happen only
+when the memory map being used by a hart changes. If the prctl() context caused
+an icache flush, this deferred icache flush will be skipped as it is redundant.
+Therefore, there will be no additional flush when using the riscv_flush_icache()
+syscall inside of the prctl() context.
+
+prctl() Interface
+---------------------
+
+Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
+remaining arguments will be delegated to the riscv_set_icache_flush_ctx
+function detailed below.
+
+.. kernel-doc:: arch/riscv/mm/cacheflush.c
+ :identifiers: riscv_set_icache_flush_ctx
+
+Example usage:
+
+The following files are meant to be compiled and linked with each other. The
+modify_instruction() function replaces an add with 0 with an add with one,
+causing the instruction sequence in get_value() to change from returning a zero
+to returning a one.
+
+cmodx.c::
+
+ #include <stdio.h>
+ #include <sys/prctl.h>
+
+ extern int get_value();
+ extern void modify_instruction();
+
+ int main()
+ {
+ int value = get_value();
+ printf("Value before cmodx: %d\n", value);
+
+ // Call prctl before first fence.i is called inside modify_instruction
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
+ modify_instruction();
+ // Call prctl after final fence.i is called in process
+ prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS);
+
+ value = get_value();
+ printf("Value after cmodx: %d\n", value);
+ return 0;
+ }
+
+cmodx.S::
+
+ .option norvc
+
+ .text
+ .global modify_instruction
+ modify_instruction:
+ lw a0, new_insn
+ lui a5,%hi(old_insn)
+ sw a0,%lo(old_insn)(a5)
+ fence.i
+ ret
+
+ .section modifiable, "awx"
+ .global get_value
+ get_value:
+ li a0, 0
+ old_insn:
+ addi a0, a0, 0
+ ret
+
+ .data
+ new_insn:
+ addi a0, a0, 1
diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
index b2bcc9eed9aa..06c5280b728a 100644
--- a/Documentation/arch/riscv/hwprobe.rst
+++ b/Documentation/arch/riscv/hwprobe.rst
@@ -51,7 +51,7 @@ The following keys are defined:
* :c:macro:`RISCV_HWPROBE_KEY_MARCHID`: Contains the value of ``marchid``, as
defined by the RISC-V privileged architecture specification.
-* :c:macro:`RISCV_HWPROBE_KEY_MIMPLID`: Contains the value of ``mimplid``, as
+* :c:macro:`RISCV_HWPROBE_KEY_MIMPID`: Contains the value of ``mimpid``, as
defined by the RISC-V privileged architecture specification.
* :c:macro:`RISCV_HWPROBE_KEY_BASE_BEHAVIOR`: A bitmask containing the base
@@ -183,30 +183,199 @@ The following keys are defined:
defined in the Atomic Compare-and-Swap (CAS) instructions manual starting
from commit 5059e0ca641c ("update to ratified").
+ * :c:macro:`RISCV_HWPROBE_EXT_ZICNTR`: The Zicntr extension version 2.0
+ is supported as defined in the RISC-V ISA manual.
+
* :c:macro:`RISCV_HWPROBE_EXT_ZICOND`: The Zicond extension is supported as
defined in the RISC-V Integer Conditional (Zicond) operations extension
manual starting from commit 95cf1f9 ("Add changes requested by Ved
during signoff")
-* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
- information about the selected set of processors.
+ * :c:macro:`RISCV_HWPROBE_EXT_ZIHINTPAUSE`: The Zihintpause extension is
+ supported as defined in the RISC-V ISA manual starting from commit
+ d8ab5c78c207 ("Zihintpause is ratified").
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
- accesses is unknown.
+ * :c:macro:`RISCV_HWPROBE_EXT_ZIHPM`: The Zihpm extension version 2.0
+ is supported as defined in the RISC-V ISA manual.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
- emulated via software, either in or below the kernel. These accesses are
- always extremely slow.
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE32X`: The Vector sub-extension Zve32x is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower
- than equivalent byte accesses. Misaligned accesses may be supported
- directly in hardware, or trapped and emulated by software.
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE32F`: The Vector sub-extension Zve32f is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster
- than equivalent byte accesses.
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE64X`: The Vector sub-extension Zve64x is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
- * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
- not supported at all and will generate a misaligned address fault.
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE64F`: The Vector sub-extension Zve64f is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVE64D`: The Vector sub-extension Zve64d is
+ supported, as defined by version 1.0 of the RISC-V Vector extension manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZIMOP`: The Zimop May-Be-Operations extension is
+ supported as defined in the RISC-V ISA manual starting from commit
+ 58220614a5f ("Zimop is ratified/1.0").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCA`: The Zca extension part of Zc* standard
+ extensions for code size reduction, as ratified in commit 8be3419c1c0
+ ("Zcf doesn't exist on RV64 as it contains no instructions") of
+ riscv-code-size-reduction.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCB`: The Zcb extension part of Zc* standard
+ extensions for code size reduction, as ratified in commit 8be3419c1c0
+ ("Zcf doesn't exist on RV64 as it contains no instructions") of
+ riscv-code-size-reduction.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCD`: The Zcd extension part of Zc* standard
+ extensions for code size reduction, as ratified in commit 8be3419c1c0
+ ("Zcf doesn't exist on RV64 as it contains no instructions") of
+ riscv-code-size-reduction.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCF`: The Zcf extension part of Zc* standard
+ extensions for code size reduction, as ratified in commit 8be3419c1c0
+ ("Zcf doesn't exist on RV64 as it contains no instructions") of
+ riscv-code-size-reduction.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZCMOP`: The Zcmop May-Be-Operations extension is
+ supported as defined in the RISC-V ISA manual starting from commit
+ c732a4f39a4 ("Zcmop is ratified/1.0").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZAWRS`: The Zawrs extension is supported as
+ ratified in commit 98918c844281 ("Merge pull request #1217 from
+ riscv/zawrs") of riscv-isa-manual.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZAAMO`: The Zaamo extension is supported as
+ defined in the in the RISC-V ISA manual starting from commit e87412e621f1
+ ("integrate Zaamo and Zalrsc text (#1304)").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZALASR`: The Zalasr extension is supported as
+ frozen at commit 194f0094 ("Version 0.9 for freeze") of riscv-zalasr.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZALRSC`: The Zalrsc extension is supported as
+ defined in the in the RISC-V ISA manual starting from commit e87412e621f1
+ ("integrate Zaamo and Zalrsc text (#1304)").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_SUPM`: The Supm extension is supported as
+ defined in version 1.0 of the RISC-V Pointer Masking extensions.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZFBFMIN`: The Zfbfmin extension is supported as
+ defined in the RISC-V ISA manual starting from commit 4dc23d6229de
+ ("Added Chapter title to BF16").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVFBFMIN`: The Zvfbfmin extension is supported as
+ defined in the RISC-V ISA manual starting from commit 4dc23d6229de
+ ("Added Chapter title to BF16").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZVFBFWMA`: The Zvfbfwma extension is supported as
+ defined in the RISC-V ISA manual starting from commit 4dc23d6229de
+ ("Added Chapter title to BF16").
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZICBOM`: The Zicbom extension is supported, as
+ ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZABHA`: The Zabha extension is supported as
+ ratified in commit 49f49c842ff9 ("Update to Rafified state") of
+ riscv-zabha.
+
+ * :c:macro:`RISCV_HWPROBE_EXT_ZICBOP`: The Zicbop extension is supported, as
+ ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs.
+
+* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to
+ :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was
+ mistakenly classified as a bitmask rather than a value.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`: An enum value describing
+ the performance of misaligned scalar native word accesses on the selected set
+ of processors.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN`: The performance of
+ misaligned scalar accesses is unknown.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED`: Misaligned scalar
+ accesses are emulated via software, either in or below the kernel. These
+ accesses are always extremely slow.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW`: Misaligned scalar native
+ word sized accesses are slower than the equivalent quantity of byte
+ accesses. Misaligned accesses may be supported directly in hardware, or
+ trapped and emulated by software.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_FAST`: Misaligned scalar native
+ word sized accesses are faster than the equivalent quantity of byte
+ accesses.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED`: Misaligned scalar
+ accesses are not supported at all and will generate a misaligned address
+ fault.
* :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which
represents the size of the Zicboz block in bytes.
+
+* :c:macro:`RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS`: An unsigned long which
+ represent the highest userspace virtual address usable.
+
+* :c:macro:`RISCV_HWPROBE_KEY_TIME_CSR_FREQ`: Frequency (in Hz) of `time CSR`.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF`: An enum value describing the
+ performance of misaligned vector accesses on the selected set of processors.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN`: The performance of misaligned
+ vector accesses is unknown.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW`: 32-bit misaligned accesses using vector
+ registers are slower than the equivalent quantity of byte accesses via vector registers.
+ Misaligned accesses may be supported directly in hardware, or trapped and emulated by software.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_FAST`: 32-bit misaligned accesses using vector
+ registers are faster than the equivalent quantity of byte accesses via vector registers.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED`: Misaligned vector accesses are
+ not supported at all and will generate a misaligned address fault.
+
+* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_MIPS_0`: A bitmask containing the
+ mips vendor extensions that are compatible with the
+ :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
+
+ * MIPS
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XMIPSEXECTL`: The xmipsexectl vendor
+ extension is supported in the MIPS ISA extensions spec.
+
+* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0`: A bitmask containing the
+ thead vendor extensions that are compatible with the
+ :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
+
+ * T-HEAD
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR`: The xtheadvector vendor
+ extension is supported in the T-Head ISA extensions spec starting from
+ commit a18c801634 ("Add T-Head VECTOR vendor extension. ").
+
+* :c:macro:`RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE`: An unsigned int which
+ represents the size of the Zicbom block in bytes.
+
+* :c:macro:`RISCV_HWPROBE_KEY_VENDOR_EXT_SIFIVE_0`: A bitmask containing the
+ sifive vendor extensions that are compatible with the
+ :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: base system behavior.
+
+ * SIFIVE
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCDOD`: The Xsfqmaccdod vendor
+ extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication
+ Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVQMACCQOQ`: The Xsfqmaccqoq vendor
+ extension is supported in version 1.1 of SiFive Int8 Matrix Multiplication
+ Instruction Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFNRCLIPXFQF`: The Xsfvfnrclipxfqf
+ vendor extension is supported in version 1.0 of SiFive FP32-to-int8 Ranged
+ Clip Instructions Extensions Specification.
+
+ * :c:macro:`RISCV_HWPROBE_VENDOR_EXT_XSFVFWMACCQQQ`: The Xsfvfwmaccqqq
+ vendor extension is supported in version 1.0 of Matrix Multiply Accumulate
+ Instruction Extensions Specification.
+
+* :c:macro:`RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE`: An unsigned int which
+ represents the size of the Zicbop block in bytes.
diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst
index 4dab0cb4b900..eecf347ce849 100644
--- a/Documentation/arch/riscv/index.rst
+++ b/Documentation/arch/riscv/index.rst
@@ -13,6 +13,7 @@ RISC-V architecture
patch-acceptance
uabi
vector
+ cmodx
features
diff --git a/Documentation/arch/riscv/uabi.rst b/Documentation/arch/riscv/uabi.rst
index 54d199dce78b..243e40062e34 100644
--- a/Documentation/arch/riscv/uabi.rst
+++ b/Documentation/arch/riscv/uabi.rst
@@ -65,4 +65,22 @@ the extension, or may have deliberately removed it from the listing.
Misaligned accesses
-------------------
-Misaligned accesses are supported in userspace, but they may perform poorly.
+Misaligned scalar accesses are supported in userspace, but they may perform
+poorly. Misaligned vector accesses are only supported if the Zicclsm extension
+is supported.
+
+Pointer masking
+---------------
+
+Support for pointer masking in userspace (the Supm extension) is provided via
+the ``PR_SET_TAGGED_ADDR_CTRL`` and ``PR_GET_TAGGED_ADDR_CTRL`` ``prctl()``
+operations. Pointer masking is disabled by default. To enable it, userspace
+must call ``PR_SET_TAGGED_ADDR_CTRL`` with the ``PR_PMLEN`` field set to the
+number of mask/tag bits needed by the application. ``PR_PMLEN`` is interpreted
+as a lower bound; if the kernel is unable to satisfy the request, the
+``PR_SET_TAGGED_ADDR_CTRL`` operation will fail. The actual number of tag bits
+is returned in ``PR_PMLEN`` by the ``PR_GET_TAGGED_ADDR_CTRL`` operation.
+
+Additionally, when pointer masking is enabled (``PR_PMLEN`` is greater than 0),
+a tagged address ABI is supported, with the same interface and behavior as
+documented for AArch64 (Documentation/arch/arm64/tagged-address-abi.rst).
diff --git a/Documentation/arch/riscv/vector.rst b/Documentation/arch/riscv/vector.rst
index 75dd88a62e1d..3987f5f76a9d 100644
--- a/Documentation/arch/riscv/vector.rst
+++ b/Documentation/arch/riscv/vector.rst
@@ -15,7 +15,7 @@ status for the use of Vector in userspace. The intended usage guideline for
these interfaces is to give init systems a way to modify the availability of V
for processes running under its domain. Calling these interfaces is not
recommended in libraries routines because libraries should not override policies
-configured from the parant process. Also, users must noted that these interfaces
+configured from the parent process. Also, users must note that these interfaces
are not portable to non-Linux, nor non-RISC-V environments, so it is discourage
to use in a portable code. To get the availability of V in an ELF program,
please read :c:macro:`COMPAT_HWCAP_ISA_V` bit of :c:macro:`ELF_HWCAP` in the
diff --git a/Documentation/arch/riscv/vm-layout.rst b/Documentation/arch/riscv/vm-layout.rst
index 69ff6da1dbf8..eabec99b5852 100644
--- a/Documentation/arch/riscv/vm-layout.rst
+++ b/Documentation/arch/riscv/vm-layout.rst
@@ -47,11 +47,12 @@ RISC-V Linux Kernel SV39
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
- ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap
- ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
- ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
- ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
- ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory
+ ffffffc4fea00000 | -236 GB | ffffffc4feffffff | 6 MB | fixmap
+ ffffffc4ff000000 | -236 GB | ffffffc4ffffffff | 16 MB | PCI io
+ ffffffc500000000 | -236 GB | ffffffc5ffffffff | 4 GB | vmemmap
+ ffffffc600000000 | -232 GB | ffffffd5ffffffff | 64 GB | vmalloc/ioremap space
+ ffffffd600000000 | -168 GB | fffffff5ffffffff | 128 GB | direct mapping of all physical memory
+ | | | |
fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan
__________________|____________|__________________|_________|____________________________________________________________
|
@@ -133,25 +134,3 @@ RISC-V Linux Kernel SV57
ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
__________________|____________|__________________|_________|____________________________________________________________
-
-
-Userspace VAs
---------------------
-To maintain compatibility with software that relies on the VA space with a
-maximum of 48 bits the kernel will, by default, return virtual addresses to
-userspace from a 48-bit range (sv48). This default behavior is achieved by
-passing 0 into the hint address parameter of mmap. On CPUs with an address space
-smaller than sv48, the CPU maximum supported address space will be the default.
-
-Software can "opt-in" to receiving VAs from another VA space by providing
-a hint address to mmap. A hint address passed to mmap will cause the largest
-address space that fits entirely into the hint to be used, unless there is no
-space left in the address space. If there is no space available in the requested
-address space, an address in the next smallest available address space will be
-returned.
-
-For example, in order to obtain 48-bit VA space, a hint address greater than
-:code:`1 << 47` must be provided. Note that this is 47 due to sv48 userspace
-ending at :code:`1 << 47` and the addresses beyond this are reserved for the
-kernel. Similarly, to obtain 57-bit VA space addresses, a hint address greater
-than or equal to :code:`1 << 56` must be provided.
diff --git a/Documentation/arch/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst
index ad4bc2dbea43..e7488f02bb78 100644
--- a/Documentation/arch/s390/driver-model.rst
+++ b/Documentation/arch/s390/driver-model.rst
@@ -244,7 +244,7 @@ information about the interrupt from the irb parameter.
--------------------
The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
-devices, like lcs or ctc.
+devices, like qeth or ctc.
The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
this attributes creates a ccwgroup device consisting of these ccw devices (if
@@ -305,24 +305,3 @@ xpram shows up under devices/system/ as 'xpram'.
For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
attribute 'online' which can be 0 or 1.
-
-
-4. Other devices
-----------------
-
-4.1 Netiucv
------------
-
-The netiucv driver creates an attribute 'connection' under
-bus/iucv/drivers/netiucv. Piping to this attribute creates a new netiucv
-connection to the specified host.
-
-Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interface
-number is assigned sequentially to the connections defined via the 'connection'
-attribute.
-
-user
- - shows the connection partner.
-
-buffer
- - maximum buffer size. Pipe to it to change buffer size.
diff --git a/Documentation/arch/s390/index.rst b/Documentation/arch/s390/index.rst
index 73c79bf586fd..e75a6e5d2505 100644
--- a/Documentation/arch/s390/index.rst
+++ b/Documentation/arch/s390/index.rst
@@ -8,6 +8,7 @@ s390 Architecture
cds
3270
driver-model
+ mm
monreader
qeth
s390dbf
diff --git a/Documentation/arch/s390/mm.rst b/Documentation/arch/s390/mm.rst
new file mode 100644
index 000000000000..084adad5eef9
--- /dev/null
+++ b/Documentation/arch/s390/mm.rst
@@ -0,0 +1,111 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Memory Management
+=================
+
+Virtual memory layout
+=====================
+
+.. note::
+
+ - Some aspects of the virtual memory layout setup are not
+ clarified (number of page levels, alignment, DMA memory).
+
+ - Unused gaps in the virtual memory layout could be present
+ or not - depending on how partucular system is configured.
+ No page tables are created for the unused gaps.
+
+ - The virtual memory regions are tracked or untracked by KASAN
+ instrumentation, as well as the KASAN shadow memory itself is
+ created only when CONFIG_KASAN configuration option is enabled.
+
+::
+
+ =============================================================================
+ | Physical | Virtual | VM area description
+ =============================================================================
+ +- 0 --------------+- 0 --------------+
+ | | S390_lowcore | Low-address memory
+ | +- 8 KB -----------+
+ | | |
+ | | |
+ | | ... unused gap | KASAN untracked
+ | | |
+ +- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
+ |.amode31 text/data|.amode31 text/data| KASAN untracked
+ +- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
+ | | |
+ | | |
+ +- __kaslr_offset_phys | kernel rand. phys start
+ | | |
+ | kernel text/data | |
+ | | |
+ +------------------+ | kernel phys end
+ | | |
+ | | |
+ | | |
+ | | |
+ +- ident_map_size -+ |
+ | |
+ | ... unused gap | KASAN untracked
+ | |
+ +- __identity_base + identity mapping start (>= 2GB)
+ | |
+ | identity | phys == virt - __identity_base
+ | mapping | virt == phys + __identity_base
+ | |
+ | | KASAN tracked
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+ +---- vmemmap -----+ 'struct page' array start
+ | |
+ | virtually mapped |
+ | memory map | KASAN untracked
+ | |
+ +- __abs_lowcore --+
+ | |
+ | Absolute Lowcore | KASAN untracked
+ | |
+ +- __memcpy_real_area
+ | |
+ | Real Memory Copy| KASAN untracked
+ | |
+ +- VMALLOC_START --+ vmalloc area start
+ | | KASAN untracked or
+ | vmalloc area | KASAN shallowly populated in case
+ | | CONFIG_KASAN_VMALLOC=y
+ +- MODULES_VADDR --+ modules area start
+ | | KASAN allocated per module or
+ | modules area | KASAN shallowly populated in case
+ | | CONFIG_KASAN_VMALLOC=y
+ +- __kaslr_offset -+ kernel rand. virt start
+ | | KASAN tracked
+ | kernel text/data | phys == (kvirt - __kaslr_offset) +
+ | | __kaslr_offset_phys
+ +- kernel .bss end + kernel rand. virt end
+ | |
+ | ... unused gap | KASAN untracked
+ | |
+ +------------------+ UltraVisor Secure Storage limit
+ | |
+ | ... unused gap | KASAN untracked
+ | |
+ +KASAN_SHADOW_START+ KASAN shadow memory start
+ | |
+ | KASAN shadow | KASAN untracked
+ | |
+ +------------------+ ASCE limit
diff --git a/Documentation/arch/s390/s390dbf.rst b/Documentation/arch/s390/s390dbf.rst
index af8bdc3629e7..aad6d88974fe 100644
--- a/Documentation/arch/s390/s390dbf.rst
+++ b/Documentation/arch/s390/s390dbf.rst
@@ -243,9 +243,8 @@ Examples:
Changing the size of debug areas
------------------------------------
-It is possible the change the size of debug areas through piping
-the number of pages to the debugfs file "pages". The resize request will
-also flush the debug areas.
+To resize a debug area, write the desired page count to the "pages" file.
+Existing data is preserved if it fits; otherwise, oldest entries are dropped.
Example:
diff --git a/Documentation/arch/s390/vfio-ap.rst b/Documentation/arch/s390/vfio-ap.rst
index 929ee1c1c940..eba1991fbdba 100644
--- a/Documentation/arch/s390/vfio-ap.rst
+++ b/Documentation/arch/s390/vfio-ap.rst
@@ -380,6 +380,36 @@ matrix device.
control_domains:
A read-only file for displaying the control domain numbers assigned to the
vfio_ap mediated device.
+ ap_config:
+ A read/write file that, when written to, allows all three of the
+ vfio_ap mediated device's ap matrix masks to be replaced in one shot.
+ Three masks are given, one for adapters, one for domains, and one for
+ control domains. If the given state cannot be set then no changes are
+ made to the vfio-ap mediated device.
+
+ The format of the data written to ap_config is as follows:
+ {amask},{dmask},{cmask}\n
+
+ \n is a newline character.
+
+ amask, dmask, and cmask are masks identifying which adapters, domains,
+ and control domains should be assigned to the mediated device.
+
+ The format of a mask is as follows:
+ 0xNN..NN
+
+ Where NN..NN is 64 hexadecimal characters representing a 256-bit value.
+ The leftmost (highest order) bit represents adapter/domain 0.
+
+ For an example set of masks that represent your mdev's current
+ configuration, simply cat ap_config.
+
+ Setting an adapter or domain number greater than the maximum allowed for
+ the system will result in an error.
+
+ This attribute is intended to be used by automation. End users would be
+ better served using the respective assign/unassign attributes for
+ adapters, domains, and control domains.
* functions:
@@ -550,7 +580,7 @@ These are the steps:
following Kconfig elements selected:
* IOMMU_SUPPORT
* S390
- * ZCRYPT
+ * AP
* VFIO
* KVM
@@ -969,6 +999,36 @@ the vfio_ap mediated device to which it is assigned as long as each new APQN
resulting from plugging it in references a queue device bound to the vfio_ap
device driver.
+Driver Features
+===============
+The vfio_ap driver exposes a sysfs file containing supported features.
+This exists so third party tools (like Libvirt and mdevctl) can query the
+availability of specific features.
+
+The features list can be found here: /sys/bus/matrix/devices/matrix/features
+
+Entries are space delimited. Each entry consists of a combination of
+alphanumeric and underscore characters.
+
+Example:
+cat /sys/bus/matrix/devices/matrix/features
+guest_matrix dyn ap_config
+
+the following features are advertised:
+
+---------------+---------------------------------------------------------------+
+| Flag | Description |
++==============+===============================================================+
+| guest_matrix | guest_matrix attribute exists. It reports the matrix of |
+| | adapters and domains that are or will be passed through to a |
+| | guest when the mdev is attached to it. |
++--------------+---------------------------------------------------------------+
+| dyn | Indicates hot plug/unplug of AP adapters, domains and control |
+| | domains for a guest to which the mdev is attached. |
++------------+-----------------------------------------------------------------+
+| ap_config | ap_config interface for one-shot modifications to mdev config |
++--------------+---------------------------------------------------------------+
+
Limitations
===========
Live guest migration is not supported for guests using AP devices without
diff --git a/Documentation/arch/sparc/oradax/dax-hv-api.txt b/Documentation/arch/sparc/oradax/dax-hv-api.txt
index 7ecd0bf4957b..ef1a4c2bf08b 100644
--- a/Documentation/arch/sparc/oradax/dax-hv-api.txt
+++ b/Documentation/arch/sparc/oradax/dax-hv-api.txt
@@ -41,7 +41,7 @@ Chapter 36. Coprocessor services
submissions until they succeed; waiting for an outstanding CCB to complete is not necessary, and would
not be a guarantee that a future submission would succeed.
- The availablility of DAX coprocessor command service is indicated by the presence of the DAX virtual
+ The availability of DAX coprocessor command service is indicated by the presence of the DAX virtual
device node in the guest MD (Section 8.24.17, “Database Analytics Accelerators (DAX) virtual-device
node”).
diff --git a/Documentation/arch/x86/amd-debugging.rst b/Documentation/arch/x86/amd-debugging.rst
new file mode 100644
index 000000000000..d92bf59d62c7
--- /dev/null
+++ b/Documentation/arch/x86/amd-debugging.rst
@@ -0,0 +1,368 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Debugging AMD Zen systems
++++++++++++++++++++++++++
+
+Introduction
+============
+
+This document describes techniques that are useful for debugging issues with
+AMD Zen systems. It is intended for use by developers and technical users
+to help identify and resolve issues.
+
+S3 vs s2idle
+============
+
+On AMD systems, it's not possible to simultaneously support suspend-to-RAM (S3)
+and suspend-to-idle (s2idle). To confirm which mode your system supports you
+can look at ``cat /sys/power/mem_sleep``. If it shows ``s2idle [deep]`` then
+*S3* is supported. If it shows ``[s2idle]`` then *s2idle* is
+supported.
+
+On systems that support *S3*, the firmware will be utilized to put all hardware into
+the appropriate low power state.
+
+On systems that support *s2idle*, the kernel will be responsible for transitioning devices
+into the appropriate low power state. When all devices are in the appropriate low
+power state, the hardware will transition into a hardware sleep state.
+
+After a suspend cycle you can tell how much time was spent in a hardware sleep
+state by looking at ``cat /sys/power/suspend_stats/last_hw_sleep``.
+
+This flowchart explains how the AMD s2idle suspend flow works.
+
+.. kernel-figure:: suspend.svg
+
+This flowchart explains how the amd s2idle resume flow works.
+
+.. kernel-figure:: resume.svg
+
+s2idle debugging tool
+=====================
+
+As there are a lot of places that problems can occur, a debugging tool has been
+created at
+`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
+that can help test for common problems and offer suggestions.
+
+If you have an s2idle issue, it's best to start with this and follow instructions
+from its findings. If you continue to have an issue, raise a bug with the
+report generated from this script to
+`drm/amd gitlab <https://gitlab.freedesktop.org/drm/amd/-/issues/new?issuable_template=s2idle_BUG_TEMPLATE>`_.
+
+Spurious s2idle wakeups from an IRQ
+===================================
+
+Spurious wakeups will generally have an IRQ set to ``/sys/power/pm_wakeup_irq``.
+This can be matched to ``/proc/interrupts`` to determine what device woke the system.
+
+If this isn't enough to debug the problem, then the following sysfs files
+can be set to add more verbosity to the wakeup process: ::
+
+ # echo 1 | sudo tee /sys/power/pm_debug_messages
+ # echo 1 | sudo tee /sys/power/pm_print_times
+
+After making those changes, the kernel will display messages that can
+be traced back to kernel s2idle loop code as well as display any active
+GPIO sources while waking up.
+
+If the wakeup is caused by the ACPI SCI, additional ACPI debugging may be
+needed. These commands can enable additional trace data: ::
+
+ # echo enable | sudo tee /sys/module/acpi/parameters/trace_state
+ # echo 1 | sudo tee /sys/module/acpi/parameters/aml_debug_output
+ # echo 0x0800000f | sudo tee /sys/module/acpi/parameters/debug_level
+ # echo 0xffff0000 | sudo tee /sys/module/acpi/parameters/debug_layer
+
+Spurious s2idle wakeups from a GPIO
+===================================
+
+If a GPIO is active when waking up the system ideally you would look at the
+schematic to determine what device it is associated with. If the schematic
+is not available, another tactic is to look at the ACPI _EVT() entry
+to determine what device is notified when that GPIO is active.
+
+For a hypothetical example, say that GPIO 59 woke up the system. You can
+look at the SSDT to determine what device is notified when GPIO 59 is active.
+
+First convert the GPIO number into hex. ::
+
+ $ python3 -c "print(hex(59))"
+ 0x3b
+
+Next determine which ACPI table has the ``_EVT`` entry. For example: ::
+
+ $ sudo grep EVT /sys/firmware/acpi/tables/SSDT*
+ grep: /sys/firmware/acpi/tables/SSDT27: binary file matches
+
+Decode this table::
+
+ $ sudo cp /sys/firmware/acpi/tables/SSDT27 .
+ $ sudo iasl -d SSDT27
+
+Then look at the table and find the matching entry for GPIO 0x3b. ::
+
+ Case (0x3B)
+ {
+ M000 (0x393B)
+ M460 (" Notify (\\_SB.PCI0.GP17.XHC1, 0x02)\n", Zero, Zero, Zero, Zero, Zero, Zero)
+ Notify (\_SB.PCI0.GP17.XHC1, 0x02) // Device Wake
+ }
+
+You can see in this case that the device ``\_SB.PCI0.GP17.XHC1`` is notified
+when GPIO 59 is active. It's obvious this is an XHCI controller, but to go a
+step further you can figure out which XHCI controller it is by matching it to
+ACPI.::
+
+ $ grep "PCI0.GP17.XHC1" /sys/bus/acpi/devices/*/path
+ /sys/bus/acpi/devices/device:2d/path:\_SB_.PCI0.GP17.XHC1
+ /sys/bus/acpi/devices/device:2e/path:\_SB_.PCI0.GP17.XHC1.RHUB
+ /sys/bus/acpi/devices/device:2f/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1
+ /sys/bus/acpi/devices/device:30/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM0
+ /sys/bus/acpi/devices/device:31/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM1
+ /sys/bus/acpi/devices/device:32/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT2
+ /sys/bus/acpi/devices/LNXPOWER:0d/path:\_SB_.PCI0.GP17.XHC1.PWRS
+
+Here you can see it matches to ``device:2d``. Look at the ``physical_node``
+to determine what PCI device that actually is. ::
+
+ $ ls -l /sys/bus/acpi/devices/device:2d/physical_node
+ lrwxrwxrwx 1 root root 0 Feb 12 13:22 /sys/bus/acpi/devices/device:2d/physical_node -> ../../../../../pci0000:00/0000:00:08.1/0000:c2:00.4
+
+So there you have it: the PCI device associated with this GPIO wakeup was ``0000:c2:00.4``.
+
+The ``amd_s2idle.py`` script will capture most of these artifacts for you.
+
+s2idle PM debug messages
+========================
+
+During the s2idle flow on AMD systems, the ACPI LPS0 driver is responsible
+to check all uPEP constraints. Failing uPEP constraints does not prevent
+s0i3 entry. This means that if some constraints are not met, it is possible
+the kernel may attempt to enter s2idle even if there are some known issues.
+
+To activate PM debugging, either specify ``pm_debug_messagess`` kernel
+command-line option at boot or write to ``/sys/power/pm_debug_messages``.
+Unmet constraints will be displayed in the kernel log and can be
+viewed by logging tools that process kernel ring buffer like ``dmesg`` or
+``journalctl``."
+
+If the system freezes on entry/exit before these messages are flushed, a
+useful debugging tactic is to unbind the ``amd_pmc`` driver to prevent
+notification to the platform to start s0i3 entry. This will stop the
+system from freezing on entry or exit and let you view all the failed
+constraints. ::
+
+ cd /sys/bus/platform/drivers/amd_pmc
+ ls | grep AMD | sudo tee unbind
+
+After doing this, run the suspend cycle and look specifically for errors around: ::
+
+ ACPI: LPI: Constraint not met; min power state:%s current power state:%s
+
+Historical examples of s2idle issues
+====================================
+
+To help understand the types of issues that can occur and how to debug them,
+here are some historical examples of s2idle issues that have been resolved.
+
+Core offlining
+--------------
+An end user had reported that taking a core offline would prevent the system
+from properly entering s0i3. This was debugged using internal AMD tools
+to capture and display a stream of metrics from the hardware showing what changed
+when a core was offlined. It was determined that the hardware didn't get
+notification the offline cores were in the deepest state, and so it prevented
+CPU from going into the deepest state. The issue was debugged to a missing
+command to put cores into C3 upon offline.
+
+`commit d6b88ce2eb9d2 ("ACPI: processor idle: Allow playing dead in C3 state") <https://git.kernel.org/torvalds/c/d6b88ce2eb9d2>`_
+
+Corruption after resume
+-----------------------
+A big problem that occurred with Rembrandt was that there was graphical
+corruption after resume. This happened because of a misalignment of PSP
+and driver responsibility. The PSP will save and restore DMCUB, but the
+driver assumed it needed to reset DMCUB on resume.
+This actually was a misalignment for earlier silicon as well, but was not
+observed.
+
+`commit 79d6b9351f086 ("drm/amd/display: Don't reinitialize DMCUB on s0ix resume") <https://git.kernel.org/torvalds/c/79d6b9351f086>`_
+
+Back to Back suspends fail
+--------------------------
+When using a wakeup source that triggers the IRQ to wakeup, a bug in the
+pinctrl-amd driver may capture the wrong state of the IRQ and prevent the
+system going back to sleep properly.
+
+`commit b8c824a869f22 ("pinctrl: amd: Don't save/restore interrupt status and wake status bits") <https://git.kernel.org/torvalds/c/b8c824a869f22>`_
+
+Spurious timer based wakeup after 5 minutes
+-------------------------------------------
+The HPET was being used to program the wakeup source for the system, however
+this was causing a spurious wakeup after 5 minutes. The correct alarm to use
+was the ACPI alarm.
+
+`commit 3d762e21d5637 ("rtc: cmos: Use ACPI alarm for non-Intel x86 systems too") <https://git.kernel.org/torvalds/c/3d762e21d5637>`_
+
+Disk disappears after resume
+----------------------------
+After resuming from s2idle, the NVME disk would disappear. This was due to the
+BIOS not specifying the _DSD StorageD3Enable property. This caused the NVME
+driver not to put the disk into the expected state at suspend and to fail
+on resume.
+
+`commit e79a10652bbd3 ("ACPI: x86: Force StorageD3Enable on more products") <https://git.kernel.org/torvalds/c/e79a10652bbd3>`_
+
+Spurious IRQ1
+-------------
+A number of Renoir, Lucienne, Cezanne, & Barcelo platforms have a
+platform firmware bug where IRQ1 is triggered during s0i3 resume.
+
+This was fixed in the platform firmware, but a number of systems didn't
+receive any more platform firmware updates.
+
+`commit 8e60615e89321 ("platform/x86/amd: pmc: Disable IRQ1 wakeup for RN/CZN") <https://git.kernel.org/torvalds/c/8e60615e89321>`_
+
+Hardware timeout
+----------------
+The hardware performs many actions besides accepting the values from
+amd-pmc driver. As the communication path with the hardware is a mailbox,
+it's possible that it might not respond quickly enough.
+This issue manifested as a failure to suspend: ::
+
+ PM: dpm_run_callback(): acpi_subsys_suspend_noirq+0x0/0x50 returns -110
+ amd_pmc AMDI0005:00: PM: failed to suspend noirq: error -110
+
+The timing problem was identified by comparing the values of the idle mask.
+
+`commit 3c3c8e88c8712 ("platform/x86: amd-pmc: Increase the response register timeout") <https://git.kernel.org/torvalds/c/3c3c8e88c8712>`_
+
+Failed to reach hardware sleep state with panel on
+--------------------------------------------------
+On some Strix systems certain panels were observed to block the system from
+entering a hardware sleep state if the internal panel was on during the sequence.
+
+Even though the panel got turned off during suspend it exposed a timing problem
+where an interrupt caused the display hardware to wake up and block low power
+state entry.
+
+`commit 40b8c14936bd2 ("drm/amd/display: Disable unneeded hpd interrupts during dm_init") <https://git.kernel.org/torvalds/c/40b8c14936bd2>`_
+
+Runtime power consumption issues
+================================
+
+Runtime power consumption is influenced by many factors, including but not
+limited to the configuration of the PCIe Active State Power Management (ASPM),
+the display brightness, the EPP policy of the CPU, and the power management
+of the devices.
+
+ASPM
+----
+For the best runtime power consumption, ASPM should be programmed as intended
+by the BIOS from the hardware vendor. To accomplish this the Linux kernel
+should be compiled with ``CONFIG_PCIEASPM_DEFAULT`` set to ``y`` and the
+sysfs file ``/sys/module/pcie_aspm/parameters/policy`` should not be modified.
+
+Most notably, if L1.2 is not configured properly for any devices, the SoC
+will not be able to enter the deepest idle state.
+
+EPP Policy
+----------
+The ``energy_performance_preference`` sysfs file can be used to set a bias
+of efficiency or performance for a CPU. This has a direct relationship on
+the battery life when more heavily biased towards performance.
+
+
+BIOS debug messages
+===================
+
+Most OEM machines don't have a serial UART for outputting kernel or BIOS
+debug messages. However BIOS debug messages are useful for understanding
+both BIOS bugs and bugs with the Linux kernel drivers that call BIOS AML.
+
+As the BIOS on most OEM AMD systems are based off an AMD reference BIOS,
+the infrastructure used for exporting debugging messages is often the same
+as AMD reference BIOS.
+
+Manually Parsing
+----------------
+There is generally an ACPI method ``\M460`` that different paths of the AML
+will call to emit a message to the BIOS serial log. This method takes
+7 arguments, with the first being a string and the rest being optional
+integers::
+
+ Method (M460, 7, Serialized)
+
+Here is an example of a string that BIOS AML may call out using ``\M460``::
+
+ M460 (" OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", DADR, Arg0, Arg1, PCSA, Zero, Zero)
+
+Normally when executed, the ``\M460`` method would populate the additional
+arguments into the string. In order to get these messages from the Linux
+kernel a hook has been added into ACPICA that can capture the *arguments*
+sent to ``\M460`` and print them to the kernel ring buffer.
+For example the following message could be emitted into kernel ring buffer::
+
+ extrace-0174 ex_trace_args : " OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", ec106000, 2, 1, 1, 0, 0
+
+In order to get these messages, you need to compile with ``CONFIG_ACPI_DEBUG``
+and then turn on the following ACPICA tracing parameters.
+This can be done either on the kernel command line or at runtime:
+
+* ``acpi.trace_method_name=\M460``
+* ``acpi.trace_state=method``
+
+NOTE: These can be very noisy at bootup. If you turn these parameters on
+the kernel command, please also consider turning up ``CONFIG_LOG_BUF_SHIFT``
+to a larger size such as 17 to avoid losing early boot messages.
+
+Tool assisted Parsing
+---------------------
+As mentioned above, parsing by hand can be tedious, especially with a lot of
+messages. To help with this, a tool has been created at
+`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
+to help parse the messages.
+
+Random reboot issues
+====================
+
+When a random reboot occurs, the high-level reason for the reboot is stored
+in a register that will persist onto the next boot.
+
+There are 6 classes of reasons for the reboot:
+ * Software induced
+ * Power state transition
+ * Pin induced
+ * Hardware induced
+ * Remote reset
+ * Internal CPU event
+
+.. csv-table::
+ :header: "Bit", "Type", "Reason"
+ :align: left
+
+ "0", "Pin", "thermal pin BP_THERMTRIP_L was tripped"
+ "1", "Pin", "power button was pressed for 4 seconds"
+ "2", "Pin", "shutdown pin was tripped"
+ "4", "Remote", "remote ASF power off command was received"
+ "9", "Internal", "internal CPU thermal limit was tripped"
+ "16", "Pin", "system reset pin BP_SYS_RST_L was tripped"
+ "17", "Software", "software issued PCI reset"
+ "18", "Software", "software wrote 0x4 to reset control register 0xCF9"
+ "19", "Software", "software wrote 0x6 to reset control register 0xCF9"
+ "20", "Software", "software wrote 0xE to reset control register 0xCF9"
+ "21", "ACPI-state", "ACPI power state transition occurred"
+ "22", "Pin", "keyboard reset pin KB_RST_L was tripped"
+ "23", "Internal", "internal CPU shutdown event occurred"
+ "24", "Hardware", "system failed to boot before failed boot timer expired"
+ "25", "Hardware", "hardware watchdog timer expired"
+ "26", "Remote", "remote ASF reset command was received"
+ "27", "Internal", "an uncorrected error caused a data fabric sync flood event"
+ "29", "Internal", "FCH and MP1 failed warm reset handshake"
+ "30", "Internal", "a parity error occurred"
+ "31", "Internal", "a software sync flood event occurred"
+
+This information is read by the kernel at bootup and printed into
+the syslog. When a random reboot occurs this message can be helpful
+to determine the next component to debug.
diff --git a/Documentation/arch/x86/amd-hfi.rst b/Documentation/arch/x86/amd-hfi.rst
new file mode 100644
index 000000000000..bf3d3a1985a2
--- /dev/null
+++ b/Documentation/arch/x86/amd-hfi.rst
@@ -0,0 +1,133 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================================
+Hardware Feedback Interface For Hetero Core Scheduling On AMD Platform
+======================================================================
+
+:Copyright: 2025 Advanced Micro Devices, Inc. All Rights Reserved.
+
+:Author: Perry Yuan <perry.yuan@amd.com>
+:Author: Mario Limonciello <mario.limonciello@amd.com>
+
+Overview
+--------
+
+AMD Heterogeneous Core implementations are comprised of more than one
+architectural class and CPUs are comprised of cores of various efficiency and
+power capabilities: performance-oriented *classic cores* and power-efficient
+*dense cores*. As such, power management strategies must be designed to
+accommodate the complexities introduced by incorporating different core types.
+Heterogeneous systems can also extend to more than two architectural classes
+as well. The purpose of the scheduling feedback mechanism is to provide
+information to the operating system scheduler in real time such that the
+scheduler can direct threads to the optimal core.
+
+The goal of AMD's heterogeneous architecture is to attain power benefit by
+sending background threads to the dense cores while sending high priority
+threads to the classic cores. From a performance perspective, sending
+background threads to dense cores can free up power headroom and allow the
+classic cores to optimally service demanding threads. Furthermore, the area
+optimized nature of the dense cores allows for an increasing number of
+physical cores. This improved core density will have positive multithreaded
+performance impact.
+
+AMD Heterogeneous Core Driver
+-----------------------------
+
+The ``amd_hfi`` driver delivers the operating system a performance and energy
+efficiency capability data for each CPU in the system. The scheduler can use
+the ranking data from the HFI driver to make task placement decisions.
+
+Thread Classification and Ranking Table Interaction
+----------------------------------------------------
+
+The thread classification is used to select into a ranking table that
+describes an efficiency and performance ranking for each classification.
+
+Threads are classified during runtime into enumerated classes. The classes
+represent thread performance/power characteristics that may benefit from
+special scheduling behaviors. The below table depicts an example of thread
+classification and a preference where a given thread should be scheduled
+based on its thread class. The real time thread classification is consumed
+by the operating system and is used to inform the scheduler of where the
+thread should be placed.
+
+Thread Classification Example Table
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++----------+----------------+-------------------------------+---------------------+---------+
+| class ID | Classification | Preferred scheduling behavior | Preemption priority | Counter |
++----------+----------------+-------------------------------+---------------------+---------+
+| 0 | Default | Performant | Highest | |
++----------+----------------+-------------------------------+---------------------+---------+
+| 1 | Non-scalable | Efficient | Lowest | PMCx1A1 |
++----------+----------------+-------------------------------+---------------------+---------+
+| 2 | I/O bound | Efficient | Lowest | PMCx044 |
++----------+----------------+-------------------------------+---------------------+---------+
+
+Thread classification is performed by the hardware each time that the thread is switched out.
+Threads that don't meet any hardware specified criteria are classified as "default".
+
+AMD Hardware Feedback Interface
+--------------------------------
+
+The Hardware Feedback Interface provides to the operating system information
+about the performance and energy efficiency of each CPU in the system. Each
+capability is given as a unit-less quantity in the range [0-255]. A higher
+performance value indicates higher performance capability, and a higher
+efficiency value indicates more efficiency. Energy efficiency and performance
+are reported in separate capabilities in the shared memory based ranking table.
+
+These capabilities may change at runtime as a result of changes in the
+operating conditions of the system or the action of external factors.
+Power Management firmware is responsible for detecting events that require
+a reordering of the performance and efficiency ranking. Table updates happen
+relatively infrequently and occur on the time scale of seconds or more.
+
+The following events trigger a table update:
+ * Thermal Stress Events
+ * Silent Compute
+ * Extreme Low Battery Scenarios
+
+The kernel or a userspace policy daemon can use these capabilities to modify
+task placement decisions. For instance, if either the performance or energy
+capabilities of a given logical processor becomes zero, it is an indication
+that the hardware recommends to the operating system to not schedule any tasks
+on that processor for performance or energy efficiency reasons, respectively.
+
+Implementation details for Linux
+--------------------------------
+
+The implementation of threads scheduling consists of the following steps:
+
+1. A thread is spawned and scheduled to the ideal core using the default
+ heterogeneous scheduling policy.
+2. The processor profiles thread execution and assigns an enumerated
+ classification ID.
+ This classification is communicated to the OS via logical processor
+ scope MSR.
+3. During the thread context switch out the operating system consumes the
+ workload (WL) classification which resides in a logical processor scope MSR.
+4. The OS triggers the hardware to clear its history by writing to an MSR,
+ after consuming the WL classification and before switching in the new thread.
+5. If due to the classification, ranking table, and processor availability,
+ the thread is not on its ideal processor, the OS will then consider
+ scheduling the thread on its ideal processor (if available).
+
+Ranking Table
+-------------
+The ranking table is a shared memory region that is used to communicate the
+performance and energy efficiency capabilities of each CPU in the system.
+
+The ranking table design includes rankings for each APIC ID in the system and
+rankings both for performance and efficiency for each workload classification.
+
+.. kernel-doc:: drivers/platform/x86/amd/hfi/hfi.c
+ :doc: amd_shmem_info
+
+Ranking Table update
+---------------------------
+The power management firmware issues an platform interrupt after updating the
+ranking table and is ready for the operating system to consume it. CPUs receive
+such interrupt and read new ranking table from shared memory which PCCT table
+has provided, then ``amd_hfi`` driver parses the new table to provide new
+consume data for scheduling decisions.
diff --git a/Documentation/arch/x86/amd-memory-encryption.rst b/Documentation/arch/x86/amd-memory-encryption.rst
index 07caa8fff852..bd840df708ea 100644
--- a/Documentation/arch/x86/amd-memory-encryption.rst
+++ b/Documentation/arch/x86/amd-memory-encryption.rst
@@ -87,14 +87,14 @@ The state of SME in the Linux kernel can be documented as follows:
kernel is non-zero).
SME can also be enabled and activated in the BIOS. If SME is enabled and
-activated in the BIOS, then all memory accesses will be encrypted and it will
-not be necessary to activate the Linux memory encryption support. If the BIOS
-merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate
-memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or
-by supplying mem_encrypt=on on the kernel command line. However, if BIOS does
-not enable SME, then Linux will not be able to activate memory encryption, even
-if configured to do so by default or the mem_encrypt=on command line parameter
-is specified.
+activated in the BIOS, then all memory accesses will be encrypted and it
+will not be necessary to activate the Linux memory encryption support.
+
+If the BIOS merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG),
+then memory encryption can be enabled by supplying mem_encrypt=on on the
+kernel command line. However, if BIOS does not enable SME, then Linux
+will not be able to activate memory encryption, even if configured to do
+so by default or the mem_encrypt=on command line parameter is specified.
Secure Nested Paging (SNP)
==========================
@@ -130,4 +130,149 @@ SNP feature support.
More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR
-[1] https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf
+Reverse Map Table (RMP)
+=======================
+
+The RMP is a structure in system memory that is used to ensure a one-to-one
+mapping between system physical addresses and guest physical addresses. Each
+page of memory that is potentially assignable to guests has one entry within
+the RMP.
+
+The RMP table can be either contiguous in memory or a collection of segments
+in memory.
+
+Contiguous RMP
+--------------
+
+Support for this form of the RMP is present when support for SEV-SNP is
+present, which can be determined using the CPUID instruction::
+
+ 0x8000001f[eax]:
+ Bit[4] indicates support for SEV-SNP
+
+The location of the RMP is identified to the hardware through two MSRs::
+
+ 0xc0010132 (RMP_BASE):
+ System physical address of the first byte of the RMP
+
+ 0xc0010133 (RMP_END):
+ System physical address of the last byte of the RMP
+
+Hardware requires that RMP_BASE and (RPM_END + 1) be 8KB aligned, but SEV
+firmware increases the alignment requirement to require a 1MB alignment.
+
+The RMP consists of a 16KB region used for processor bookkeeping followed
+by the RMP entries, which are 16 bytes in size. The size of the RMP
+determines the range of physical memory that the hypervisor can assign to
+SEV-SNP guests. The RMP covers the system physical address from::
+
+ 0 to ((RMP_END + 1 - RMP_BASE - 16KB) / 16B) x 4KB.
+
+The current Linux support relies on BIOS to allocate/reserve the memory for
+the RMP and to set RMP_BASE and RMP_END appropriately. Linux uses the MSR
+values to locate the RMP and determine the size of the RMP. The RMP must
+cover all of system memory in order for Linux to enable SEV-SNP.
+
+Segmented RMP
+-------------
+
+Segmented RMP support is a new way of representing the layout of an RMP.
+Initial RMP support required the RMP table to be contiguous in memory.
+RMP accesses from a NUMA node on which the RMP doesn't reside
+can take longer than accesses from a NUMA node on which the RMP resides.
+Segmented RMP support allows the RMP entries to be located on the same
+node as the memory the RMP is covering, potentially reducing latency
+associated with accessing an RMP entry associated with the memory. Each
+RMP segment covers a specific range of system physical addresses.
+
+Support for this form of the RMP can be determined using the CPUID
+instruction::
+
+ 0x8000001f[eax]:
+ Bit[23] indicates support for segmented RMP
+
+If supported, segmented RMP attributes can be found using the CPUID
+instruction::
+
+ 0x80000025[eax]:
+ Bits[5:0] minimum supported RMP segment size
+ Bits[11:6] maximum supported RMP segment size
+
+ 0x80000025[ebx]:
+ Bits[9:0] number of cacheable RMP segment definitions
+ Bit[10] indicates if the number of cacheable RMP segments
+ is a hard limit
+
+To enable a segmented RMP, a new MSR is available::
+
+ 0xc0010136 (RMP_CFG):
+ Bit[0] indicates if segmented RMP is enabled
+ Bits[13:8] contains the size of memory covered by an RMP
+ segment (expressed as a power of 2)
+
+The RMP segment size defined in the RMP_CFG MSR applies to all segments
+of the RMP. Therefore each RMP segment covers a specific range of system
+physical addresses. For example, if the RMP_CFG MSR value is 0x2401, then
+the RMP segment coverage value is 0x24 => 36, meaning the size of memory
+covered by an RMP segment is 64GB (1 << 36). So the first RMP segment
+covers physical addresses from 0 to 0xF_FFFF_FFFF, the second RMP segment
+covers physical addresses from 0x10_0000_0000 to 0x1F_FFFF_FFFF, etc.
+
+When a segmented RMP is enabled, RMP_BASE points to the RMP bookkeeping
+area as it does today (16K in size). However, instead of RMP entries
+beginning immediately after the bookkeeping area, there is a 4K RMP
+segment table (RST). Each entry in the RST is 8-bytes in size and represents
+an RMP segment::
+
+ Bits[19:0] mapped size (in GB)
+ The mapped size can be less than the defined segment size.
+ A value of zero, indicates that no RMP exists for the range
+ of system physical addresses associated with this segment.
+ Bits[51:20] segment physical address
+ This address is left shift 20-bits (or just masked when
+ read) to form the physical address of the segment (1MB
+ alignment).
+
+The RST can hold 512 segment entries but can be limited in size to the number
+of cacheable RMP segments (CPUID 0x80000025_EBX[9:0]) if the number of cacheable
+RMP segments is a hard limit (CPUID 0x80000025_EBX[10]).
+
+The current Linux support relies on BIOS to allocate/reserve the memory for
+the segmented RMP (the bookkeeping area, RST, and all segments), build the RST
+and to set RMP_BASE, RMP_END, and RMP_CFG appropriately. Linux uses the MSR
+values to locate the RMP and determine the size and location of the RMP
+segments. The RMP must cover all of system memory in order for Linux to enable
+SEV-SNP.
+
+More details in the AMD64 APM Vol 2, section "15.36.3 Reverse Map Table",
+docID: 24593.
+
+Secure VM Service Module (SVSM)
+===============================
+
+SNP provides a feature called Virtual Machine Privilege Levels (VMPL) which
+defines four privilege levels at which guest software can run. The most
+privileged level is 0 and numerically higher numbers have lesser privileges.
+More details in the AMD64 APM Vol 2, section "15.35.7 Virtual Machine
+Privilege Levels", docID: 24593.
+
+When using that feature, different services can run at different protection
+levels, apart from the guest OS but still within the secure SNP environment.
+They can provide services to the guest, like a vTPM, for example.
+
+When a guest is not running at VMPL0, it needs to communicate with the software
+running at VMPL0 to perform privileged operations or to interact with secure
+services. An example fur such a privileged operation is PVALIDATE which is
+*required* to be executed at VMPL0.
+
+In this scenario, the software running at VMPL0 is usually called a Secure VM
+Service Module (SVSM). Discovery of an SVSM and the API used to communicate
+with it is documented in "Secure VM Service Module for SEV-SNP Guests", docID:
+58019.
+
+(Latest versions of the above-mentioned documents can be found by using
+a search engine like duckduckgo.com and typing in:
+
+ site:amd.com "Secure VM Service Module for SEV-SNP Guests", docID: 58019
+
+for example.)
diff --git a/Documentation/arch/x86/amd_hsmp.rst b/Documentation/arch/x86/amd_hsmp.rst
index c92bfd55359f..a094f55c10b0 100644
--- a/Documentation/arch/x86/amd_hsmp.rst
+++ b/Documentation/arch/x86/amd_hsmp.rst
@@ -4,8 +4,9 @@
AMD HSMP interface
============================================
-Newer Fam19h EPYC server line of processors from AMD support system
-management functionality via HSMP (Host System Management Port).
+Newer Fam19h(model 0x00-0x1f, 0x30-0x3f, 0x90-0x9f, 0xa0-0xaf),
+Fam1Ah(model 0x00-0x1f) EPYC server line of processors from AMD support
+system management functionality via HSMP (Host System Management Port).
The Host System Management Port (HSMP) is an interface to provide
OS-level software with access to system management functions via a
@@ -13,16 +14,28 @@ set of mailbox registers.
More details on the interface can be found in chapter
"7 Host System Management Port (HSMP)" of the family/model PPR
-Eg: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip
+Eg: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/55898_B1_pub_0_50.zip
-HSMP interface is supported on EPYC server CPU models only.
+
+HSMP interface is supported on EPYC line of server CPUs and MI300A (APU).
HSMP device
============================================
-amd_hsmp driver under the drivers/platforms/x86/ creates miscdevice
-/dev/hsmp to let user space programs run hsmp mailbox commands.
+amd_hsmp driver under drivers/platforms/x86/amd/hsmp/ has separate driver files
+for ACPI object based probing, platform device based probing and for the common
+code for these two drivers.
+
+Kconfig option CONFIG_AMD_HSMP_PLAT compiles plat.c and creates amd_hsmp.ko.
+Kconfig option CONFIG_AMD_HSMP_ACPI compiles acpi.c and creates hsmp_acpi.ko.
+Selecting any of these two configs automatically selects CONFIG_AMD_HSMP. This
+compiles common code hsmp.c and creates hsmp_common.ko module.
+
+Both the ACPI and plat drivers create the miscdevice /dev/hsmp to let
+user space programs run hsmp mailbox commands.
+
+The ACPI object format supported by the driver is defined below.
$ ls -al /dev/hsmp
crw-r--r-- 1 root root 10, 123 Jan 21 21:41 /dev/hsmp
@@ -58,6 +71,81 @@ Note: lseek() is not supported as entire metrics table is read.
Metrics table definitions will be documented as part of Public PPR.
The same is defined in the amd_hsmp.h header.
+2. HSMP telemetry sysfs files
+
+Following sysfs files are available at /sys/devices/platform/AMDI0097:0X/.
+
+* c0_residency_input: Percentage of cores in C0 state.
+* prochot_status: Reports 1 if the processor is at thermal threshold value,
+ 0 otherwise.
+* smu_fw_version: SMU firmware version.
+* protocol_version: HSMP interface version.
+* ddr_max_bw: Theoretical maximum DDR bandwidth in GB/s.
+* ddr_utilised_bw_input: Current utilized DDR bandwidth in GB/s.
+* ddr_utilised_bw_perc_input(%): Percentage of current utilized DDR bandwidth.
+* mclk_input: Memory clock in MHz.
+* fclk_input: Fabric clock in MHz.
+* clk_fmax: Maximum frequency of socket in MHz.
+* clk_fmin: Minimum frequency of socket in MHz.
+* cclk_freq_limit_input: Core clock frequency limit per socket in MHz.
+* pwr_current_active_freq_limit: Current active frequency limit of socket
+ in MHz.
+* pwr_current_active_freq_limit_source: Source of current active frequency
+ limit.
+
+ACPI device object format
+=========================
+The ACPI object format expected from the amd_hsmp driver
+for socket with ID00 is given below::
+
+ Device(HSMP)
+ {
+ Name(_HID, "AMDI0097")
+ Name(_UID, "ID00")
+ Name(HSE0, 0x00000001)
+ Name(RBF0, ResourceTemplate()
+ {
+ Memory32Fixed(ReadWrite, 0xxxxxxx, 0x00100000)
+ })
+ Method(_CRS, 0, NotSerialized)
+ {
+ Return(RBF0)
+ }
+ Method(_STA, 0, NotSerialized)
+ {
+ If(LEqual(HSE0, One))
+ {
+ Return(0x0F)
+ }
+ Else
+ {
+ Return(Zero)
+ }
+ }
+ Name(_DSD, Package(2)
+ {
+ Buffer(0x10)
+ {
+ 0x9D, 0x61, 0x4D, 0xB7, 0x07, 0x57, 0xBD, 0x48,
+ 0xA6, 0x9F, 0x4E, 0xA2, 0x87, 0x1F, 0xC2, 0xF6
+ },
+ Package(3)
+ {
+ Package(2) {"MsgIdOffset", 0x00010934},
+ Package(2) {"MsgRspOffset", 0x00010980},
+ Package(2) {"MsgArgOffset", 0x000109E0}
+ }
+ })
+ }
+
+HSMP HWMON interface
+====================
+HSMP power sensors are registered with the hwmon interface. A separate hwmon
+directory is created for each socket and the following files are generated
+within the hwmon directory.
+- power1_input (read only)
+- power1_cap_max (read only)
+- power1_cap (read, write)
An example
==========
@@ -97,8 +185,8 @@ what happened. The transaction returns 0 on success.
More details on the interface and message definitions can be found in chapter
"7 Host System Management Port (HSMP)" of the respective family/model PPR
-eg: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip
+eg: https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/programmer-references/55898_B1_pub_0_50.zip
User space C-APIs are made available by linking against the esmi library,
-which is provided by the E-SMS project https://developer.amd.com/e-sms/.
+which is provided by the E-SMS project https://www.amd.com/en/developer/e-sms.html.
See: https://github.com/amd/esmi_ib_library
diff --git a/Documentation/arch/x86/boot.rst b/Documentation/arch/x86/boot.rst
index c513855a54bb..6d36ce86fd8e 100644
--- a/Documentation/arch/x86/boot.rst
+++ b/Documentation/arch/x86/boot.rst
@@ -77,7 +77,7 @@ Protocol 2.14 BURNT BY INCORRECT COMMIT
Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
============= ============================================================
- .. note::
+.. note::
The protocol version number should be changed only if the setup header
is changed. There is no need to update the version number if boot_params
or kernel_info are changed. Additionally, it is recommended to use
@@ -95,27 +95,27 @@ Memory Layout
The traditional memory map for the kernel loader, used for Image or
zImage kernels, typically looks like::
- | |
- 0A0000 +------------------------+
- | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
- 09A000 +------------------------+
- | Command line |
- | Stack/heap | For use by the kernel real-mode code.
- 098000 +------------------------+
- | Kernel setup | The kernel real-mode code.
- 090200 +------------------------+
- | Kernel boot sector | The kernel legacy boot sector.
- 090000 +------------------------+
- | Protected-mode kernel | The bulk of the kernel image.
- 010000 +------------------------+
- | Boot loader | <- Boot sector entry point 0000:7C00
- 001000 +------------------------+
- | Reserved for MBR/BIOS |
- 000800 +------------------------+
- | Typically used by MBR |
- 000600 +------------------------+
- | BIOS use only |
- 000000 +------------------------+
+ | |
+ 0A0000 +------------------------+
+ | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
+ 09A000 +------------------------+
+ | Command line |
+ | Stack/heap | For use by the kernel real-mode code.
+ 098000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ 090200 +------------------------+
+ | Kernel boot sector | The kernel legacy boot sector.
+ 090000 +------------------------+
+ | Protected-mode kernel | The bulk of the kernel image.
+ 010000 +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+ 001000 +------------------------+
+ | Reserved for MBR/BIOS |
+ 000800 +------------------------+
+ | Typically used by MBR |
+ 000600 +------------------------+
+ | BIOS use only |
+ 000000 +------------------------+
When using bzImage, the protected-mode kernel was relocated to
0x100000 ("high memory"), and the kernel real-mode block (boot sector,
@@ -142,28 +142,28 @@ above the 0x9A000 point; too many BIOSes will break above that point.
For a modern bzImage kernel with boot protocol version >= 2.02, a
memory layout like the following is suggested::
- ~ ~
- | Protected-mode kernel |
- 100000 +------------------------+
- | I/O memory hole |
- 0A0000 +------------------------+
- | Reserved for BIOS | Leave as much as possible unused
- ~ ~
- | Command line | (Can also be below the X+10000 mark)
- X+10000 +------------------------+
- | Stack/heap | For use by the kernel real-mode code.
- X+08000 +------------------------+
- | Kernel setup | The kernel real-mode code.
- | Kernel boot sector | The kernel legacy boot sector.
- X +------------------------+
- | Boot loader | <- Boot sector entry point 0000:7C00
- 001000 +------------------------+
- | Reserved for MBR/BIOS |
- 000800 +------------------------+
- | Typically used by MBR |
- 000600 +------------------------+
- | BIOS use only |
- 000000 +------------------------+
+ ~ ~
+ | Protected-mode kernel |
+ 100000 +------------------------+
+ | I/O memory hole |
+ 0A0000 +------------------------+
+ | Reserved for BIOS | Leave as much as possible unused
+ ~ ~
+ | Command line | (Can also be below the X+10000 mark)
+ X+10000 +------------------------+
+ | Stack/heap | For use by the kernel real-mode code.
+ X+08000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ | Kernel boot sector | The kernel legacy boot sector.
+ X +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+ 001000 +------------------------+
+ | Reserved for MBR/BIOS |
+ 000800 +------------------------+
+ | Typically used by MBR |
+ 000600 +------------------------+
+ | BIOS use only |
+ 000000 +------------------------+
... where the address X is as low as the design of the boot loader permits.
@@ -229,22 +229,22 @@ Offset/Size Proto Name Meaning
=========== ======== ===================== ============================================
.. note::
- (1) For backwards compatibility, if the setup_sects field contains 0, the
- real value is 4.
+ (1) For backwards compatibility, if the setup_sects field contains 0,
+ the real value is 4.
- (2) For boot protocol prior to 2.04, the upper two bytes of the syssize
- field are unusable, which means the size of a bzImage kernel
- cannot be determined.
+ (2) For boot protocol prior to 2.04, the upper two bytes of the syssize
+ field are unusable, which means the size of a bzImage kernel
+ cannot be determined.
- (3) Ignored, but safe to set, for boot protocols 2.02-2.09.
+ (3) Ignored, but safe to set, for boot protocols 2.02-2.09.
If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
the boot protocol version is "old". Loading an old kernel, the
following parameters should be assumed::
- Image type = zImage
- initrd not supported
- Real-mode kernel must be located at 0x90000.
+ Image type = zImage
+ initrd not supported
+ Real-mode kernel must be located at 0x90000.
Otherwise, the "version" field contains the protocol version,
e.g. protocol version 2.01 will contain 0x0201 in this field. When
@@ -265,7 +265,7 @@ All general purpose boot loaders should write the fields marked
nonstandard address should fill in the fields marked (reloc); other
boot loaders can ignore those fields.
-The byte order of all fields is littleendian (this is x86, after all.)
+The byte order of all fields is little endian (this is x86, after all.)
============ ===========
Field name: setup_sects
@@ -365,7 +365,7 @@ Offset/size: 0x206/2
Protocol: 2.00+
============ =======
- Contains the boot protocol version, in (major << 8)+minor format,
+ Contains the boot protocol version, in (major << 8) + minor format,
e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
10.17.
@@ -397,17 +397,17 @@ Protocol: 2.00+
If set to a nonzero value, contains a pointer to a NUL-terminated
human-readable kernel version number string, less 0x200. This can
be used to display the kernel version to the user. This value
- should be less than (0x200*setup_sects).
+ should be less than (0x200 * setup_sects).
For example, if this value is set to 0x1c00, the kernel version
number string can be found at offset 0x1e00 in the kernel file.
This is a valid value if and only if the "setup_sects" field
contains the value 15 or higher, as::
- 0x1c00 < 15*0x200 (= 0x1e00) but
- 0x1c00 >= 14*0x200 (= 0x1c00)
+ 0x1c00 < 15 * 0x200 (= 0x1e00) but
+ 0x1c00 >= 14 * 0x200 (= 0x1c00)
- 0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
+ 0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
============ ==================
Field name: type_of_loader
@@ -416,7 +416,7 @@ Offset/size: 0x210/1
Protocol: 2.00+
============ ==================
- If your boot loader has an assigned id (see table below), enter
+ If your boot loader has an assigned ID (see table below), enter
0xTV here, where T is an identifier for the boot loader and V is
a version number. Otherwise, enter 0xFF here.
@@ -427,35 +427,35 @@ Protocol: 2.00+
For example, for T = 0x15, V = 0x234, write::
- type_of_loader <- 0xE4
- ext_loader_type <- 0x05
- ext_loader_ver <- 0x23
+ type_of_loader <- 0xE4
+ ext_loader_type <- 0x05
+ ext_loader_ver <- 0x23
- Assigned boot loader ids (hexadecimal):
+ Assigned boot loader IDs:
== =======================================
- 0 LILO
- (0x00 reserved for pre-2.00 bootloader)
- 1 Loadlin
- 2 bootsect-loader
- (0x20, all other values reserved)
- 3 Syslinux
- 4 Etherboot/gPXE/iPXE
- 5 ELILO
- 7 GRUB
- 8 U-Boot
- 9 Xen
- A Gujin
- B Qemu
- C Arcturus Networks uCbootloader
- D kexec-tools
- E Extended (see ext_loader_type)
- F Special (0xFF = undefined)
- 10 Reserved
- 11 Minimal Linux Bootloader
- <http://sebastian-plotz.blogspot.de>
- 12 OVMF UEFI virtualization stack
- 13 barebox
+ 0x0 LILO
+ (0x00 reserved for pre-2.00 bootloader)
+ 0x1 Loadlin
+ 0x2 bootsect-loader
+ (0x20, all other values reserved)
+ 0x3 Syslinux
+ 0x4 Etherboot/gPXE/iPXE
+ 0x5 ELILO
+ 0x7 GRUB
+ 0x8 U-Boot
+ 0x9 Xen
+ 0xA Gujin
+ 0xB Qemu
+ 0xC Arcturus Networks uCbootloader
+ 0xD kexec-tools
+ 0xE Extended (see ext_loader_type)
+ 0xF Special (0xFF = undefined)
+ 0x10 Reserved
+ 0x11 Minimal Linux Bootloader
+ <http://sebastian-plotz.blogspot.de>
+ 0x12 OVMF UEFI virtualization stack
+ 0x13 barebox
== =======================================
Please contact <hpa@zytor.com> if you need a bootloader ID value assigned.
@@ -686,7 +686,7 @@ Protocol: 2.10+
If a boot loader makes use of this field, it should update the
kernel_alignment field with the alignment unit desired; typically::
- kernel_alignment = 1 << min_alignment
+ kernel_alignment = 1 << min_alignment;
There may be a considerable performance cost with an excessively
misaligned kernel. Therefore, a loader should typically try each
@@ -754,7 +754,7 @@ Protocol: 2.07+
0x00000000 The default x86/PC environment
0x00000001 lguest
0x00000002 Xen
- 0x00000003 Moorestown MID
+ 0x00000003 Intel MID (Moorestown, CloverTrail, Merrifield, Moorefield)
0x00000004 CE4100 TV Platform
========== ==============================
@@ -808,13 +808,13 @@ Protocol: 2.09+
parameters passing mechanism. The definition of struct setup_data is
as follow::
- struct setup_data {
- u64 next;
- u32 type;
- u32 len;
- u8 data[0];
- };
-
+ struct setup_data {
+ __u64 next;
+ __u32 type;
+ __u32 len;
+ __u8 data[];
+ }
+
Where, the next is a 64-bit physical pointer to the next node of
linked list, the next field of the last node is 0; the type is used
to identify the contents of data; the len is the length of data
@@ -834,12 +834,12 @@ Protocol: 2.09+
Thus setup_indirect struct and SETUP_INDIRECT type were introduced in
protocol 2.15::
- struct setup_indirect {
- __u32 type;
- __u32 reserved; /* Reserved, must be set to zero. */
- __u64 len;
- __u64 addr;
- };
+ struct setup_indirect {
+ __u32 type;
+ __u32 reserved; /* Reserved, must be set to zero. */
+ __u64 len;
+ __u64 addr;
+ };
The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
SETUP_INDIRECT itself since making the setup_indirect a tree structure
@@ -849,17 +849,17 @@ Protocol: 2.09+
Let's give an example how to point to SETUP_E820_EXT data using setup_indirect.
In this case setup_data and setup_indirect will look like this::
- struct setup_data {
- __u64 next = 0 or <addr_of_next_setup_data_struct>;
- __u32 type = SETUP_INDIRECT;
- __u32 len = sizeof(setup_indirect);
- __u8 data[sizeof(setup_indirect)] = struct setup_indirect {
- __u32 type = SETUP_INDIRECT | SETUP_E820_EXT;
- __u32 reserved = 0;
- __u64 len = <len_of_SETUP_E820_EXT_data>;
- __u64 addr = <addr_of_SETUP_E820_EXT_data>;
- }
- }
+ struct setup_data {
+ .next = 0, /* or <addr_of_next_setup_data_struct> */
+ .type = SETUP_INDIRECT,
+ .len = sizeof(setup_indirect),
+ .data[sizeof(setup_indirect)] = (struct setup_indirect) {
+ .type = SETUP_INDIRECT | SETUP_E820_EXT,
+ .reserved = 0,
+ .len = <len_of_SETUP_E820_EXT_data>,
+ .addr = <addr_of_SETUP_E820_EXT_data>,
+ },
+ }
.. note::
SETUP_INDIRECT | SETUP_NONE objects cannot be properly distinguished
@@ -878,7 +878,8 @@ Protocol: 2.10+
address if possible.
A non-relocatable kernel will unconditionally move itself and to run
- at this address.
+ at this address. A relocatable kernel will move itself to this address if it
+ loaded below this address.
============ =======
Field name: init_size
@@ -895,10 +896,19 @@ Offset/size: 0x260/4
The kernel runtime start address is determined by the following algorithm::
- if (relocatable_kernel)
- runtime_start = align_up(load_address, kernel_alignment)
- else
- runtime_start = pref_address
+ if (relocatable_kernel) {
+ if (load_address < pref_address)
+ load_address = pref_address;
+ runtime_start = align_up(load_address, kernel_alignment);
+ } else {
+ runtime_start = pref_address;
+ }
+
+Hence the necessary memory window location and size can be estimated by
+a boot loader as::
+
+ memory_window_start = runtime_start;
+ memory_window_size = init_size;
============ ===============
Field name: handover_offset
@@ -928,12 +938,12 @@ The kernel_info
===============
The relationships between the headers are analogous to the various data
-sections:
+sections::
setup_header = .data
boot_params/setup_data = .bss
-What is missing from the above list? That's right:
+What is missing from the above list? That's right::
kernel_info = .rodata
@@ -965,22 +975,22 @@ after kernel_info_var_len_data label. Each chunk of variable size data has to
be prefixed with header/magic and its size, e.g.::
kernel_info:
- .ascii "LToP" /* Header, Linux top (structure). */
- .long kernel_info_var_len_data - kernel_info
- .long kernel_info_end - kernel_info
- .long 0x01234567 /* Some fixed size data for the bootloaders. */
+ .ascii "LToP" /* Header, Linux top (structure). */
+ .long kernel_info_var_len_data - kernel_info
+ .long kernel_info_end - kernel_info
+ .long 0x01234567 /* Some fixed size data for the bootloaders. */
kernel_info_var_len_data:
- example_struct: /* Some variable size data for the bootloaders. */
- .ascii "0123" /* Header/Magic. */
- .long example_struct_end - example_struct
- .ascii "Struct"
- .long 0x89012345
+ example_struct: /* Some variable size data for the bootloaders. */
+ .ascii "0123" /* Header/Magic. */
+ .long example_struct_end - example_struct
+ .ascii "Struct"
+ .long 0x89012345
example_struct_end:
- example_strings: /* Some variable size data for the bootloaders. */
- .ascii "ABCD" /* Header/Magic. */
- .long example_strings_end - example_strings
- .asciz "String_0"
- .asciz "String_1"
+ example_strings: /* Some variable size data for the bootloaders. */
+ .ascii "ABCD" /* Header/Magic. */
+ .long example_strings_end - example_strings
+ .asciz "String_0"
+ .asciz "String_1"
example_strings_end:
kernel_info_end:
@@ -1028,16 +1038,6 @@ Offset/size: 0x000c/4
This field contains maximal allowed type for setup_data and setup_indirect structs.
-The Image Checksum
-==================
-
-From boot protocol version 2.08 onwards the CRC-32 is calculated over
-the entire file using the characteristic polynomial 0x04C11DB7 and an
-initial remainder of 0xffffffff. The checksum is appended to the
-file; therefore the CRC of the file up to the limit specified in the
-syssize field of the header is always 0.
-
-
The Kernel Command Line
=======================
@@ -1129,67 +1129,63 @@ mode segment.
Such a boot loader should enter the following fields in the header::
- unsigned long base_ptr; /* base address for real-mode segment */
-
- if ( setup_sects == 0 ) {
- setup_sects = 4;
- }
+ unsigned long base_ptr; /* base address for real-mode segment */
- if ( protocol >= 0x0200 ) {
- type_of_loader = <type code>;
- if ( loading_initrd ) {
- ramdisk_image = <initrd_address>;
- ramdisk_size = <initrd_size>;
- }
+ if (setup_sects == 0)
+ setup_sects = 4;
- if ( protocol >= 0x0202 && loadflags & 0x01 )
- heap_end = 0xe000;
- else
- heap_end = 0x9800;
+ if (protocol >= 0x0200) {
+ type_of_loader = <type code>;
+ if (loading_initrd) {
+ ramdisk_image = <initrd_address>;
+ ramdisk_size = <initrd_size>;
+ }
- if ( protocol >= 0x0201 ) {
- heap_end_ptr = heap_end - 0x200;
- loadflags |= 0x80; /* CAN_USE_HEAP */
- }
+ if (protocol >= 0x0202 && loadflags & 0x01)
+ heap_end = 0xe000;
+ else
+ heap_end = 0x9800;
- if ( protocol >= 0x0202 ) {
- cmd_line_ptr = base_ptr + heap_end;
- strcpy(cmd_line_ptr, cmdline);
- } else {
- cmd_line_magic = 0xA33F;
- cmd_line_offset = heap_end;
- setup_move_size = heap_end + strlen(cmdline)+1;
- strcpy(base_ptr+cmd_line_offset, cmdline);
- }
- } else {
- /* Very old kernel */
+ if (protocol >= 0x0201) {
+ heap_end_ptr = heap_end - 0x200;
+ loadflags |= 0x80; /* CAN_USE_HEAP */
+ }
- heap_end = 0x9800;
+ if (protocol >= 0x0202) {
+ cmd_line_ptr = base_ptr + heap_end;
+ strcpy(cmd_line_ptr, cmdline);
+ } else {
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
+ setup_move_size = heap_end + strlen(cmdline) + 1;
+ strcpy(base_ptr + cmd_line_offset, cmdline);
+ }
+ } else {
+ /* Very old kernel */
- cmd_line_magic = 0xA33F;
- cmd_line_offset = heap_end;
+ heap_end = 0x9800;
- /* A very old kernel MUST have its real-mode code
- loaded at 0x90000 */
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
- if ( base_ptr != 0x90000 ) {
- /* Copy the real-mode kernel */
- memcpy(0x90000, base_ptr, (setup_sects+1)*512);
- base_ptr = 0x90000; /* Relocated */
- }
+ /* A very old kernel MUST have its real-mode code loaded at 0x90000 */
+ if (base_ptr != 0x90000) {
+ /* Copy the real-mode kernel */
+ memcpy(0x90000, base_ptr, (setup_sects + 1) * 512);
+ base_ptr = 0x90000; /* Relocated */
+ }
- strcpy(0x90000+cmd_line_offset, cmdline);
+ strcpy(0x90000 + cmd_line_offset, cmdline);
- /* It is recommended to clear memory up to the 32K mark */
- memset(0x90000 + (setup_sects+1)*512, 0,
- (64-(setup_sects+1))*512);
- }
+ /* It is recommended to clear memory up to the 32K mark */
+ memset(0x90000 + (setup_sects + 1) * 512, 0, (64 - (setup_sects + 1)) * 512);
+ }
Loading The Rest of The Kernel
==============================
-The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512
+The 32-bit (non-real-mode) kernel starts at offset (setup_sects + 1) * 512
in the kernel file (again, if setup_sects == 0 the real value is 4.)
It should be loaded at address 0x10000 for Image/zImage kernels and
0x100000 for bzImage kernels.
@@ -1197,13 +1193,14 @@ It should be loaded at address 0x10000 for Image/zImage kernels and
The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01
bit (LOAD_HIGH) in the loadflags field is set::
- is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
- load_address = is_bzImage ? 0x100000 : 0x10000;
+ is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
+ load_address = is_bzImage ? 0x100000 : 0x10000;
-Note that Image/zImage kernels can be up to 512K in size, and thus use
-the entire 0x10000-0x90000 range of memory. This means it is pretty
-much a requirement for these kernels to load the real-mode part at
-0x90000. bzImage kernels allow much more flexibility.
+.. note::
+ Image/zImage kernels can be up to 512K in size, and thus use the entire
+ 0x10000-0x90000 range of memory. This means it is pretty much a
+ requirement for these kernels to load the real-mode part at 0x90000.
+ bzImage kernels allow much more flexibility.
Special Command Line Options
============================
@@ -1272,19 +1269,20 @@ es = ss.
In our example from above, we would do::
- /* Note: in the case of the "old" kernel protocol, base_ptr must
- be == 0x90000 at this point; see the previous sample code */
-
- seg = base_ptr >> 4;
+ /*
+ * Note: in the case of the "old" kernel protocol, base_ptr must
+ * be == 0x90000 at this point; see the previous sample code.
+ */
+ seg = base_ptr >> 4;
- cli(); /* Enter with interrupts disabled! */
+ cli(); /* Enter with interrupts disabled! */
- /* Set up the real-mode kernel stack */
- _SS = seg;
- _SP = heap_end;
+ /* Set up the real-mode kernel stack */
+ _SS = seg;
+ _SP = heap_end;
- _DS = _ES = _FS = _GS = seg;
- jmp_far(seg+0x20, 0); /* Run the kernel */
+ _DS = _ES = _FS = _GS = seg;
+ jmp_far(seg + 0x20, 0); /* Run the kernel */
If your boot sector accesses a floppy drive, it is recommended to
switch off the floppy motor before running the kernel, since the
@@ -1339,7 +1337,7 @@ from offset 0x01f1 of kernel image on should be loaded into struct
boot_params and examined. The end of setup header can be calculated as
follow::
- 0x0202 + byte value at offset 0x0201
+ 0x0202 + byte value at offset 0x0201
In addition to read/modify/write the setup header of the struct
boot_params as that of 16-bit boot protocol, the boot loader should
@@ -1375,7 +1373,7 @@ Then, the setup header at offset 0x01f1 of kernel image on should be
loaded into struct boot_params and examined. The end of setup header
can be calculated as follows::
- 0x0202 + byte value at offset 0x0201
+ 0x0202 + byte value at offset 0x0201
In addition to read/modify/write the setup header of the struct
boot_params as that of 16-bit boot protocol, the boot loader should
@@ -1417,7 +1415,7 @@ execution context provided by the EFI firmware.
The function prototype for the handover entry point looks like this::
- efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)
+ void efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp);
'handle' is the EFI image handle passed to the boot loader by the EFI
firmware, 'table' is the EFI system table - these are the first two
@@ -1432,12 +1430,35 @@ The boot loader *must* fill out the following fields in bp::
All other fields should be zero.
-NOTE: The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF
- entry point, combined with the LINUX_EFI_INITRD_MEDIA_GUID based initrd
- loading protocol (refer to [0] for an example of the bootloader side of
- this), which removes the need for any knowledge on the part of the EFI
- bootloader regarding the internal representation of boot_params or any
- requirements/limitations regarding the placement of the command line
- and ramdisk in memory, or the placement of the kernel image itself.
+.. note::
+ The EFI Handover Protocol is deprecated in favour of the ordinary PE/COFF
+ entry point described below.
+
+.. _pe-coff-entry-point:
+
+PE/COFF entry point
+===================
+
+When compiled with ``CONFIG_EFI_STUB=y``, the kernel can be executed as a
+regular PE/COFF binary. See Documentation/admin-guide/efi-stub.rst for
+implementation details.
+
+The stub loader can request the initrd via a UEFI protocol. For this to work,
+the firmware or bootloader needs to register a handle which carries
+implementations of the ``EFI_LOAD_FILE2`` protocol and the device path
+protocol exposing the ``LINUX_EFI_INITRD_MEDIA_GUID`` vendor media device path.
+In this case, a kernel booting via the EFI stub will invoke
+``LoadFile2::LoadFile()`` method on the registered protocol to instruct the
+firmware to load the initrd into a memory location chosen by the kernel/EFI
+stub.
+
+This approach removes the need for any knowledge on the part of the EFI
+bootloader regarding the internal representation of boot_params or any
+requirements/limitations regarding the placement of the command line and
+ramdisk in memory, or the placement of the kernel image itself.
+
+For sample implementations, refer to `the original u-boot implementation`_ or
+`the OVMF implementation`_.
-[0] https://github.com/u-boot/u-boot/commit/ec80b4735a593961fe701cc3a5d717d4739b0fd0
+.. _the original u-boot implementation: https://github.com/u-boot/u-boot/commit/ec80b4735a593961fe701cc3a5d717d4739b0fd0
+.. _the OVMF implementation: https://github.com/tianocore/edk2/blob/1780373897f12c25075f8883e073144506441168/OvmfPkg/LinuxInitrdDynamicShellCommand/LinuxInitrdDynamicShellCommand.c
diff --git a/Documentation/arch/x86/buslock.rst b/Documentation/arch/x86/buslock.rst
index 4c5a4822eeb7..31f1bfdff16f 100644
--- a/Documentation/arch/x86/buslock.rst
+++ b/Documentation/arch/x86/buslock.rst
@@ -26,7 +26,8 @@ Detection
=========
Intel processors may support either or both of the following hardware
-mechanisms to detect split locks and bus locks.
+mechanisms to detect split locks and bus locks. Some AMD processors also
+support bus lock detect.
#AC exception for split lock detection
--------------------------------------
diff --git a/Documentation/arch/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst
index 8895784d4784..9f2e47c4b1c8 100644
--- a/Documentation/arch/x86/cpuinfo.rst
+++ b/Documentation/arch/x86/cpuinfo.rst
@@ -11,7 +11,7 @@ The list of feature flags in /proc/cpuinfo is not complete and
represents an ill-fated attempt from long time ago to put feature flags
in an easy to find place for userspace.
-However, the amount of feature flags is growing by the CPU generation,
+However, the number of feature flags is growing with each CPU generation,
leading to unparseable and unwieldy /proc/cpuinfo.
What is more, those feature flags do not even need to be in that file
@@ -79,8 +79,9 @@ feature flags.
How are feature flags created?
==============================
-a: Feature flags can be derived from the contents of CPUID leaves.
-------------------------------------------------------------------
+Feature flags can be derived from the contents of CPUID leaves
+--------------------------------------------------------------
+
These feature definitions are organized mirroring the layout of CPUID
leaves and grouped in words with offsets as mapped in enum cpuid_leafs
in cpufeatures.h (see arch/x86/include/asm/cpufeatures.h for details).
@@ -89,8 +90,9 @@ cpufeatures.h, and if it is detected at run time, the flags will be
displayed accordingly in /proc/cpuinfo. For example, the flag "avx2"
comes from X86_FEATURE_AVX2 in cpufeatures.h.
-b: Flags can be from scattered CPUID-based features.
-----------------------------------------------------
+Flags can be from scattered CPUID-based features
+------------------------------------------------
+
Hardware features enumerated in sparsely populated CPUID leaves get
software-defined values. Still, CPUID needs to be queried to determine
if a given feature is present. This is done in init_scattered_cpuid_features().
@@ -104,18 +106,19 @@ has only one feature and would waste 31 bits of space in the x86_capability[]
array. Since there is a struct cpuinfo_x86 for each possible CPU, the wasted
memory is not trivial.
-c: Flags can be created synthetically under certain conditions for hardware features.
--------------------------------------------------------------------------------------
+Flags can be created synthetically under certain conditions for hardware features
+---------------------------------------------------------------------------------
+
Examples of conditions include whether certain features are present in
MSR_IA32_CORE_CAPS or specific CPU models are identified. If the needed
conditions are met, the features are enabled by the set_cpu_cap or
setup_force_cpu_cap macros. For example, if bit 5 is set in MSR_IA32_CORE_CAPS,
the feature X86_FEATURE_SPLIT_LOCK_DETECT will be enabled and
"split_lock_detect" will be displayed. The flag "ring3mwait" will be
-displayed only when running on INTEL_FAM6_XEON_PHI_[KNL|KNM] processors.
+displayed only when running on INTEL_XEON_PHI_[KNL|KNM] processors.
-d: Flags can represent purely software features.
-------------------------------------------------
+Flags can represent purely software features
+--------------------------------------------
These flags do not represent hardware features. Instead, they represent a
software feature implemented in the kernel. For example, Kernel Page Table
Isolation is purely software feature and its feature flag X86_FEATURE_PTI is
@@ -130,14 +133,18 @@ x86_cap/bug_flags[] arrays in kernel/cpu/capflags.c. The names in the
resulting x86_cap/bug_flags[] are used to populate /proc/cpuinfo. The naming
of flags in the x86_cap/bug_flags[] are as follows:
-a: The name of the flag is from the string in X86_FEATURE_<name> by default.
-----------------------------------------------------------------------------
-By default, the flag <name> in /proc/cpuinfo is extracted from the respective
-X86_FEATURE_<name> in cpufeatures.h. For example, the flag "avx2" is from
-X86_FEATURE_AVX2.
+Flags do not appear by default in /proc/cpuinfo
+-----------------------------------------------
+
+Feature flags are omitted by default from /proc/cpuinfo as it does not make
+sense for the feature to be exposed to userspace in most cases. For example,
+X86_FEATURE_ALWAYS is defined in cpufeatures.h but that flag is an internal
+kernel feature used in the alternative runtime patching functionality. So the
+flag does not appear in /proc/cpuinfo.
+
+Specify a flag name if absolutely needed
+----------------------------------------
-b: The naming can be overridden.
---------------------------------
If the comment on the line for the #define X86_FEATURE_* starts with a
double-quote character (""), the string inside the double-quote characters
will be the name of the flags. For example, the flag "sse4_1" comes from
@@ -148,36 +155,31 @@ needed. For instance, /proc/cpuinfo is a userspace interface and must remain
constant. If, for some reason, the naming of X86_FEATURE_<name> changes, one
shall override the new naming with the name already used in /proc/cpuinfo.
-c: The naming override can be "", which means it will not appear in /proc/cpuinfo.
-----------------------------------------------------------------------------------
-The feature shall be omitted from /proc/cpuinfo if it does not make sense for
-the feature to be exposed to userspace. For example, X86_FEATURE_ALWAYS is
-defined in cpufeatures.h but that flag is an internal kernel feature used
-in the alternative runtime patching functionality. So, its name is overridden
-with "". Its flag will not appear in /proc/cpuinfo.
-
Flags are missing when one or more of these happen
==================================================
-a: The hardware does not enumerate support for it.
---------------------------------------------------
+The hardware does not enumerate support for it
+----------------------------------------------
+
For example, when a new kernel is running on old hardware or the feature is
not enabled by boot firmware. Even if the hardware is new, there might be a
problem enabling the feature at run time, the flag will not be displayed.
-b: The kernel does not know about the flag.
--------------------------------------------
+The kernel does not know about the flag
+---------------------------------------
+
For example, when an old kernel is running on new hardware.
-c: The kernel disabled support for it at compile-time.
-------------------------------------------------------
-For example, if 5-level-paging is not enabled when building (i.e.,
-CONFIG_X86_5LEVEL is not selected) the flag "la57" will not show up [#f1]_.
+The kernel disabled support for it at compile-time
+--------------------------------------------------
+
+For example, if Linear Address Masking (LAM) is not enabled when building (i.e.,
+CONFIG_ADDRESS_MASKING is not selected) the flag "lam" will not show up.
Even though the feature will still be detected via CPUID, the kernel disables
-it by clearing via setup_clear_cpu_cap(X86_FEATURE_LA57).
+it by clearing via setup_clear_cpu_cap(X86_FEATURE_LAM).
-d: The feature is disabled at boot-time.
-----------------------------------------
+The feature is disabled at boot-time
+------------------------------------
A feature can be disabled either using a command-line parameter or because
it failed to be enabled. The command-line parameter clearcpuid= can be used
to disable features using the feature number as defined in
@@ -190,12 +192,11 @@ disable specific features. The list of parameters includes, but is not limited
to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using
"no5lvl".
-e: The feature was known to be non-functional.
-----------------------------------------------
+The feature was known to be non-functional
+------------------------------------------
+
The feature was known to be non-functional because a dependency was
missing at runtime. For example, AVX flags will not show up if XSAVE feature
is disabled since they depend on XSAVE feature. Another example would be broken
CPUs and them missing microcode patches. Due to that, the kernel decides not to
enable a feature.
-
-.. [#f1] 5-level paging uses linear address of 57 bits.
diff --git a/Documentation/arch/x86/exception-tables.rst b/Documentation/arch/x86/exception-tables.rst
index efde1fef4fbd..6e7177363f8f 100644
--- a/Documentation/arch/x86/exception-tables.rst
+++ b/Documentation/arch/x86/exception-tables.rst
@@ -297,7 +297,7 @@ vma occurs?
c) execution continues at local label 2 (address of the
instruction immediately after the faulting user access).
-The steps 8a to 8c in a certain way emulate the faulting instruction.
+ The steps a to c above in a certain way emulate the faulting instruction.
That's it, mostly. If you look at our example, you might ask why
we set EAX to -EFAULT in the exception handler code. Well, the
diff --git a/Documentation/arch/x86/index.rst b/Documentation/arch/x86/index.rst
index 8ac64d7de4dc..f88bcfceb7f2 100644
--- a/Documentation/arch/x86/index.rst
+++ b/Documentation/arch/x86/index.rst
@@ -25,13 +25,14 @@ x86-specific Documentation
shstk
iommu
intel_txt
+ amd-debugging
amd-memory-encryption
amd_hsmp
+ amd-hfi
tdx
pti
mds
microcode
- resctrl
tsx_async_abort
buslock
usb-legacy-support
diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst
index e73fdff62c0a..3518671e1a85 100644
--- a/Documentation/arch/x86/mds.rst
+++ b/Documentation/arch/x86/mds.rst
@@ -93,7 +93,10 @@ enters a C-state.
The kernel provides a function to invoke the buffer clearing:
- mds_clear_cpu_buffers()
+ x86_clear_cpu_buffers()
+
+Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
+Other than CFLAGS.ZF, this macro doesn't clobber any registers.
The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
(idle) transitions.
@@ -138,17 +141,30 @@ Mitigation points
When transitioning from kernel to user space the CPU buffers are flushed
on affected CPUs when the mitigation is not disabled on the kernel
- command line. The migitation is enabled through the static key
- mds_user_clear.
-
- The mitigation is invoked in prepare_exit_to_usermode() which covers
- all but one of the kernel to user space transitions. The exception
- is when we return from a Non Maskable Interrupt (NMI), which is
- handled directly in do_nmi().
-
- (The reason that NMI is special is that prepare_exit_to_usermode() can
- enable IRQs. In NMI context, NMIs are blocked, and we don't want to
- enable IRQs with NMIs blocked.)
+ command line. The mitigation is enabled through the feature flag
+ X86_FEATURE_CLEAR_CPU_BUF.
+
+ The mitigation is invoked just before transitioning to userspace after
+ user registers are restored. This is done to minimize the window in
+ which kernel data could be accessed after VERW e.g. via an NMI after
+ VERW.
+
+ **Corner case not handled**
+ Interrupts returning to kernel don't clear CPUs buffers since the
+ exit-to-user path is expected to do that anyways. But, there could be
+ a case when an NMI is generated in kernel after the exit-to-user path
+ has cleared the buffers. This case is not handled and NMI returning to
+ kernel don't clear CPU buffers because:
+
+ 1. It is rare to get an NMI after VERW, but before returning to userspace.
+ 2. For an unprivileged user, there is no known way to make that NMI
+ less rare or target it.
+ 3. It would take a large number of these precisely-timed NMIs to mount
+ an actual attack. There's presumably not enough bandwidth.
+ 4. The NMI in question occurs after a VERW, i.e. when user state is
+ restored and most interesting data is already scrubbed. What's left
+ is only the data that NMI touches, and that may or may not be of
+ any interest.
2. C-State transition
@@ -169,9 +185,9 @@ Mitigation points
idle clearing would be a window dressing exercise and is therefore not
activated.
- The invocation is controlled by the static key mds_idle_clear which is
- switched depending on the chosen mitigation mode and the SMT state of
- the system.
+ The invocation is controlled by the static key cpu_buf_idle_clear which is
+ switched depending on the chosen mitigation mode and the SMT state of the
+ system.
The buffer clear is only invoked before entering the C-State to prevent
that stale data from the idling CPU from spilling to the Hyper-Thread
diff --git a/Documentation/arch/x86/pti.rst b/Documentation/arch/x86/pti.rst
index e08d35177bc0..57e8392f61d3 100644
--- a/Documentation/arch/x86/pti.rst
+++ b/Documentation/arch/x86/pti.rst
@@ -26,9 +26,9 @@ comments in pti.c).
This approach helps to ensure that side-channel attacks leveraging
the paging structures do not function when PTI is enabled. It can be
-enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
-Once enabled at compile-time, it can be disabled at boot with the
-'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+enabled by setting CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y at compile
+time. Once enabled at compile-time, it can be disabled at boot with
+the 'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
Page Table Management
=====================
diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst
deleted file mode 100644
index a6279df64a9d..000000000000
--- a/Documentation/arch/x86/resctrl.rst
+++ /dev/null
@@ -1,1480 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-.. include:: <isonum.txt>
-
-===========================================
-User Interface for Resource Control feature
-===========================================
-
-:Copyright: |copy| 2016 Intel Corporation
-:Authors: - Fenghua Yu <fenghua.yu@intel.com>
- - Tony Luck <tony.luck@intel.com>
- - Vikas Shivappa <vikas.shivappa@intel.com>
-
-
-Intel refers to this feature as Intel Resource Director Technology(Intel(R) RDT).
-AMD refers to this feature as AMD Platform Quality of Service(AMD QoS).
-
-This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo
-flag bits:
-
-=============================================== ================================
-RDT (Resource Director Technology) Allocation "rdt_a"
-CAT (Cache Allocation Technology) "cat_l3", "cat_l2"
-CDP (Code and Data Prioritization) "cdp_l3", "cdp_l2"
-CQM (Cache QoS Monitoring) "cqm_llc", "cqm_occup_llc"
-MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local"
-MBA (Memory Bandwidth Allocation) "mba"
-SMBA (Slow Memory Bandwidth Allocation) ""
-BMEC (Bandwidth Monitoring Event Configuration) ""
-=============================================== ================================
-
-Historically, new features were made visible by default in /proc/cpuinfo. This
-resulted in the feature flags becoming hard to parse by humans. Adding a new
-flag to /proc/cpuinfo should be avoided if user space can obtain information
-about the feature from resctrl's info directory.
-
-To use the feature mount the file system::
-
- # mount -t resctrl resctrl [-o cdp[,cdpl2][,mba_MBps][,debug]] /sys/fs/resctrl
-
-mount options are:
-
-"cdp":
- Enable code/data prioritization in L3 cache allocations.
-"cdpl2":
- Enable code/data prioritization in L2 cache allocations.
-"mba_MBps":
- Enable the MBA Software Controller(mba_sc) to specify MBA
- bandwidth in MBps
-"debug":
- Make debug files accessible. Available debug files are annotated with
- "Available only with debug option".
-
-L2 and L3 CDP are controlled separately.
-
-RDT features are orthogonal. A particular system may support only
-monitoring, only control, or both monitoring and control. Cache
-pseudo-locking is a unique way of using cache control to "pin" or
-"lock" data in the cache. Details can be found in
-"Cache Pseudo-Locking".
-
-
-The mount succeeds if either of allocation or monitoring is present, but
-only those files and directories supported by the system will be created.
-For more details on the behavior of the interface during monitoring
-and allocation, see the "Resource alloc and monitor groups" section.
-
-Info directory
-==============
-
-The 'info' directory contains information about the enabled
-resources. Each resource has its own subdirectory. The subdirectory
-names reflect the resource names.
-
-Each subdirectory contains the following files with respect to
-allocation:
-
-Cache resource(L3/L2) subdirectory contains the following files
-related to allocation:
-
-"num_closids":
- The number of CLOSIDs which are valid for this
- resource. The kernel uses the smallest number of
- CLOSIDs of all enabled resources as limit.
-"cbm_mask":
- The bitmask which is valid for this resource.
- This mask is equivalent to 100%.
-"min_cbm_bits":
- The minimum number of consecutive bits which
- must be set when writing a mask.
-
-"shareable_bits":
- Bitmask of shareable resource with other executing
- entities (e.g. I/O). User can use this when
- setting up exclusive cache partitions. Note that
- some platforms support devices that have their
- own settings for cache use which can over-ride
- these bits.
-"bit_usage":
- Annotated capacity bitmasks showing how all
- instances of the resource are used. The legend is:
-
- "0":
- Corresponding region is unused. When the system's
- resources have been allocated and a "0" is found
- in "bit_usage" it is a sign that resources are
- wasted.
-
- "H":
- Corresponding region is used by hardware only
- but available for software use. If a resource
- has bits set in "shareable_bits" but not all
- of these bits appear in the resource groups'
- schematas then the bits appearing in
- "shareable_bits" but no resource group will
- be marked as "H".
- "X":
- Corresponding region is available for sharing and
- used by hardware and software. These are the
- bits that appear in "shareable_bits" as
- well as a resource group's allocation.
- "S":
- Corresponding region is used by software
- and available for sharing.
- "E":
- Corresponding region is used exclusively by
- one resource group. No sharing allowed.
- "P":
- Corresponding region is pseudo-locked. No
- sharing allowed.
-"sparse_masks":
- Indicates if non-contiguous 1s value in CBM is supported.
-
- "0":
- Only contiguous 1s value in CBM is supported.
- "1":
- Non-contiguous 1s value in CBM is supported.
-
-Memory bandwidth(MB) subdirectory contains the following files
-with respect to allocation:
-
-"min_bandwidth":
- The minimum memory bandwidth percentage which
- user can request.
-
-"bandwidth_gran":
- The granularity in which the memory bandwidth
- percentage is allocated. The allocated
- b/w percentage is rounded off to the next
- control step available on the hardware. The
- available bandwidth control steps are:
- min_bandwidth + N * bandwidth_gran.
-
-"delay_linear":
- Indicates if the delay scale is linear or
- non-linear. This field is purely informational
- only.
-
-"thread_throttle_mode":
- Indicator on Intel systems of how tasks running on threads
- of a physical core are throttled in cases where they
- request different memory bandwidth percentages:
-
- "max":
- the smallest percentage is applied
- to all threads
- "per-thread":
- bandwidth percentages are directly applied to
- the threads running on the core
-
-If RDT monitoring is available there will be an "L3_MON" directory
-with the following files:
-
-"num_rmids":
- The number of RMIDs available. This is the
- upper bound for how many "CTRL_MON" + "MON"
- groups can be created.
-
-"mon_features":
- Lists the monitoring events if
- monitoring is enabled for the resource.
- Example::
-
- # cat /sys/fs/resctrl/info/L3_MON/mon_features
- llc_occupancy
- mbm_total_bytes
- mbm_local_bytes
-
- If the system supports Bandwidth Monitoring Event
- Configuration (BMEC), then the bandwidth events will
- be configurable. The output will be::
-
- # cat /sys/fs/resctrl/info/L3_MON/mon_features
- llc_occupancy
- mbm_total_bytes
- mbm_total_bytes_config
- mbm_local_bytes
- mbm_local_bytes_config
-
-"mbm_total_bytes_config", "mbm_local_bytes_config":
- Read/write files containing the configuration for the mbm_total_bytes
- and mbm_local_bytes events, respectively, when the Bandwidth
- Monitoring Event Configuration (BMEC) feature is supported.
- The event configuration settings are domain specific and affect
- all the CPUs in the domain. When either event configuration is
- changed, the bandwidth counters for all RMIDs of both events
- (mbm_total_bytes as well as mbm_local_bytes) are cleared for that
- domain. The next read for every RMID will report "Unavailable"
- and subsequent reads will report the valid value.
-
- Following are the types of events supported:
-
- ==== ========================================================
- Bits Description
- ==== ========================================================
- 6 Dirty Victims from the QOS domain to all types of memory
- 5 Reads to slow memory in the non-local NUMA domain
- 4 Reads to slow memory in the local NUMA domain
- 3 Non-temporal writes to non-local NUMA domain
- 2 Non-temporal writes to local NUMA domain
- 1 Reads to memory in the non-local NUMA domain
- 0 Reads to memory in the local NUMA domain
- ==== ========================================================
-
- By default, the mbm_total_bytes configuration is set to 0x7f to count
- all the event types and the mbm_local_bytes configuration is set to
- 0x15 to count all the local memory events.
-
- Examples:
-
- * To view the current configuration::
- ::
-
- # cat /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config
- 0=0x7f;1=0x7f;2=0x7f;3=0x7f
-
- # cat /sys/fs/resctrl/info/L3_MON/mbm_local_bytes_config
- 0=0x15;1=0x15;3=0x15;4=0x15
-
- * To change the mbm_total_bytes to count only reads on domain 0,
- the bits 0, 1, 4 and 5 needs to be set, which is 110011b in binary
- (in hexadecimal 0x33):
- ::
-
- # echo "0=0x33" > /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config
-
- # cat /sys/fs/resctrl/info/L3_MON/mbm_total_bytes_config
- 0=0x33;1=0x7f;2=0x7f;3=0x7f
-
- * To change the mbm_local_bytes to count all the slow memory reads on
- domain 0 and 1, the bits 4 and 5 needs to be set, which is 110000b
- in binary (in hexadecimal 0x30):
- ::
-
- # echo "0=0x30;1=0x30" > /sys/fs/resctrl/info/L3_MON/mbm_local_bytes_config
-
- # cat /sys/fs/resctrl/info/L3_MON/mbm_local_bytes_config
- 0=0x30;1=0x30;3=0x15;4=0x15
-
-"max_threshold_occupancy":
- Read/write file provides the largest value (in
- bytes) at which a previously used LLC_occupancy
- counter can be considered for re-use.
-
-Finally, in the top level of the "info" directory there is a file
-named "last_cmd_status". This is reset with every "command" issued
-via the file system (making new directories or writing to any of the
-control files). If the command was successful, it will read as "ok".
-If the command failed, it will provide more information that can be
-conveyed in the error returns from file operations. E.g.
-::
-
- # echo L3:0=f7 > schemata
- bash: echo: write error: Invalid argument
- # cat info/last_cmd_status
- mask f7 has non-consecutive 1-bits
-
-Resource alloc and monitor groups
-=================================
-
-Resource groups are represented as directories in the resctrl file
-system. The default group is the root directory which, immediately
-after mounting, owns all the tasks and cpus in the system and can make
-full use of all resources.
-
-On a system with RDT control features additional directories can be
-created in the root directory that specify different amounts of each
-resource (see "schemata" below). The root and these additional top level
-directories are referred to as "CTRL_MON" groups below.
-
-On a system with RDT monitoring the root directory and other top level
-directories contain a directory named "mon_groups" in which additional
-directories can be created to monitor subsets of tasks in the CTRL_MON
-group that is their ancestor. These are called "MON" groups in the rest
-of this document.
-
-Removing a directory will move all tasks and cpus owned by the group it
-represents to the parent. Removing one of the created CTRL_MON groups
-will automatically remove all MON groups below it.
-
-Moving MON group directories to a new parent CTRL_MON group is supported
-for the purpose of changing the resource allocations of a MON group
-without impacting its monitoring data or assigned tasks. This operation
-is not allowed for MON groups which monitor CPUs. No other move
-operation is currently allowed other than simply renaming a CTRL_MON or
-MON group.
-
-All groups contain the following files:
-
-"tasks":
- Reading this file shows the list of all tasks that belong to
- this group. Writing a task id to the file will add a task to the
- group. Multiple tasks can be added by separating the task ids
- with commas. Tasks will be assigned sequentially. Multiple
- failures are not supported. A single failure encountered while
- attempting to assign a task will cause the operation to abort and
- already added tasks before the failure will remain in the group.
- Failures will be logged to /sys/fs/resctrl/info/last_cmd_status.
-
- If the group is a CTRL_MON group the task is removed from
- whichever previous CTRL_MON group owned the task and also from
- any MON group that owned the task. If the group is a MON group,
- then the task must already belong to the CTRL_MON parent of this
- group. The task is removed from any previous MON group.
-
-
-"cpus":
- Reading this file shows a bitmask of the logical CPUs owned by
- this group. Writing a mask to this file will add and remove
- CPUs to/from this group. As with the tasks file a hierarchy is
- maintained where MON groups may only include CPUs owned by the
- parent CTRL_MON group.
- When the resource group is in pseudo-locked mode this file will
- only be readable, reflecting the CPUs associated with the
- pseudo-locked region.
-
-
-"cpus_list":
- Just like "cpus", only using ranges of CPUs instead of bitmasks.
-
-
-When control is enabled all CTRL_MON groups will also contain:
-
-"schemata":
- A list of all the resources available to this group.
- Each resource has its own line and format - see below for details.
-
-"size":
- Mirrors the display of the "schemata" file to display the size in
- bytes of each allocation instead of the bits representing the
- allocation.
-
-"mode":
- The "mode" of the resource group dictates the sharing of its
- allocations. A "shareable" resource group allows sharing of its
- allocations while an "exclusive" resource group does not. A
- cache pseudo-locked region is created by first writing
- "pseudo-locksetup" to the "mode" file before writing the cache
- pseudo-locked region's schemata to the resource group's "schemata"
- file. On successful pseudo-locked region creation the mode will
- automatically change to "pseudo-locked".
-
-"ctrl_hw_id":
- Available only with debug option. The identifier used by hardware
- for the control group. On x86 this is the CLOSID.
-
-When monitoring is enabled all MON groups will also contain:
-
-"mon_data":
- This contains a set of files organized by L3 domain and by
- RDT event. E.g. on a system with two L3 domains there will
- be subdirectories "mon_L3_00" and "mon_L3_01". Each of these
- directories have one file per event (e.g. "llc_occupancy",
- "mbm_total_bytes", and "mbm_local_bytes"). In a MON group these
- files provide a read out of the current value of the event for
- all tasks in the group. In CTRL_MON groups these files provide
- the sum for all tasks in the CTRL_MON group and all tasks in
- MON groups. Please see example section for more details on usage.
-
-"mon_hw_id":
- Available only with debug option. The identifier used by hardware
- for the monitor group. On x86 this is the RMID.
-
-Resource allocation rules
--------------------------
-
-When a task is running the following rules define which resources are
-available to it:
-
-1) If the task is a member of a non-default group, then the schemata
- for that group is used.
-
-2) Else if the task belongs to the default group, but is running on a
- CPU that is assigned to some specific group, then the schemata for the
- CPU's group is used.
-
-3) Otherwise the schemata for the default group is used.
-
-Resource monitoring rules
--------------------------
-1) If a task is a member of a MON group, or non-default CTRL_MON group
- then RDT events for the task will be reported in that group.
-
-2) If a task is a member of the default CTRL_MON group, but is running
- on a CPU that is assigned to some specific group, then the RDT events
- for the task will be reported in that group.
-
-3) Otherwise RDT events for the task will be reported in the root level
- "mon_data" group.
-
-
-Notes on cache occupancy monitoring and control
-===============================================
-When moving a task from one group to another you should remember that
-this only affects *new* cache allocations by the task. E.g. you may have
-a task in a monitor group showing 3 MB of cache occupancy. If you move
-to a new group and immediately check the occupancy of the old and new
-groups you will likely see that the old group is still showing 3 MB and
-the new group zero. When the task accesses locations still in cache from
-before the move, the h/w does not update any counters. On a busy system
-you will likely see the occupancy in the old group go down as cache lines
-are evicted and re-used while the occupancy in the new group rises as
-the task accesses memory and loads into the cache are counted based on
-membership in the new group.
-
-The same applies to cache allocation control. Moving a task to a group
-with a smaller cache partition will not evict any cache lines. The
-process may continue to use them from the old partition.
-
-Hardware uses CLOSid(Class of service ID) and an RMID(Resource monitoring ID)
-to identify a control group and a monitoring group respectively. Each of
-the resource groups are mapped to these IDs based on the kind of group. The
-number of CLOSid and RMID are limited by the hardware and hence the creation of
-a "CTRL_MON" directory may fail if we run out of either CLOSID or RMID
-and creation of "MON" group may fail if we run out of RMIDs.
-
-max_threshold_occupancy - generic concepts
-------------------------------------------
-
-Note that an RMID once freed may not be immediately available for use as
-the RMID is still tagged the cache lines of the previous user of RMID.
-Hence such RMIDs are placed on limbo list and checked back if the cache
-occupancy has gone down. If there is a time when system has a lot of
-limbo RMIDs but which are not ready to be used, user may see an -EBUSY
-during mkdir.
-
-max_threshold_occupancy is a user configurable value to determine the
-occupancy at which an RMID can be freed.
-
-Schemata files - general concepts
----------------------------------
-Each line in the file describes one resource. The line starts with
-the name of the resource, followed by specific values to be applied
-in each of the instances of that resource on the system.
-
-Cache IDs
----------
-On current generation systems there is one L3 cache per socket and L2
-caches are generally just shared by the hyperthreads on a core, but this
-isn't an architectural requirement. We could have multiple separate L3
-caches on a socket, multiple cores could share an L2 cache. So instead
-of using "socket" or "core" to define the set of logical cpus sharing
-a resource we use a "Cache ID". At a given cache level this will be a
-unique number across the whole system (but it isn't guaranteed to be a
-contiguous sequence, there may be gaps). To find the ID for each logical
-CPU look in /sys/devices/system/cpu/cpu*/cache/index*/id
-
-Cache Bit Masks (CBM)
----------------------
-For cache resources we describe the portion of the cache that is available
-for allocation using a bitmask. The maximum value of the mask is defined
-by each cpu model (and may be different for different cache levels). It
-is found using CPUID, but is also provided in the "info" directory of
-the resctrl file system in "info/{resource}/cbm_mask". Some Intel hardware
-requires that these masks have all the '1' bits in a contiguous block. So
-0x3, 0x6 and 0xC are legal 4-bit masks with two bits set, but 0x5, 0x9
-and 0xA are not. Check /sys/fs/resctrl/info/{resource}/sparse_masks
-if non-contiguous 1s value is supported. On a system with a 20-bit mask
-each bit represents 5% of the capacity of the cache. You could partition
-the cache into four equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
-
-Memory bandwidth Allocation and monitoring
-==========================================
-
-For Memory bandwidth resource, by default the user controls the resource
-by indicating the percentage of total memory bandwidth.
-
-The minimum bandwidth percentage value for each cpu model is predefined
-and can be looked up through "info/MB/min_bandwidth". The bandwidth
-granularity that is allocated is also dependent on the cpu model and can
-be looked up at "info/MB/bandwidth_gran". The available bandwidth
-control steps are: min_bw + N * bw_gran. Intermediate values are rounded
-to the next control step available on the hardware.
-
-The bandwidth throttling is a core specific mechanism on some of Intel
-SKUs. Using a high bandwidth and a low bandwidth setting on two threads
-sharing a core may result in both threads being throttled to use the
-low bandwidth (see "thread_throttle_mode").
-
-The fact that Memory bandwidth allocation(MBA) may be a core
-specific mechanism where as memory bandwidth monitoring(MBM) is done at
-the package level may lead to confusion when users try to apply control
-via the MBA and then monitor the bandwidth to see if the controls are
-effective. Below are such scenarios:
-
-1. User may *not* see increase in actual bandwidth when percentage
- values are increased:
-
-This can occur when aggregate L2 external bandwidth is more than L3
-external bandwidth. Consider an SKL SKU with 24 cores on a package and
-where L2 external is 10GBps (hence aggregate L2 external bandwidth is
-240GBps) and L3 external bandwidth is 100GBps. Now a workload with '20
-threads, having 50% bandwidth, each consuming 5GBps' consumes the max L3
-bandwidth of 100GBps although the percentage value specified is only 50%
-<< 100%. Hence increasing the bandwidth percentage will not yield any
-more bandwidth. This is because although the L2 external bandwidth still
-has capacity, the L3 external bandwidth is fully used. Also note that
-this would be dependent on number of cores the benchmark is run on.
-
-2. Same bandwidth percentage may mean different actual bandwidth
- depending on # of threads:
-
-For the same SKU in #1, a 'single thread, with 10% bandwidth' and '4
-thread, with 10% bandwidth' can consume upto 10GBps and 40GBps although
-they have same percentage bandwidth of 10%. This is simply because as
-threads start using more cores in an rdtgroup, the actual bandwidth may
-increase or vary although user specified bandwidth percentage is same.
-
-In order to mitigate this and make the interface more user friendly,
-resctrl added support for specifying the bandwidth in MBps as well. The
-kernel underneath would use a software feedback mechanism or a "Software
-Controller(mba_sc)" which reads the actual bandwidth using MBM counters
-and adjust the memory bandwidth percentages to ensure::
-
- "actual bandwidth < user specified bandwidth".
-
-By default, the schemata would take the bandwidth percentage values
-where as user can switch to the "MBA software controller" mode using
-a mount option 'mba_MBps'. The schemata format is specified in the below
-sections.
-
-L3 schemata file details (code and data prioritization disabled)
-----------------------------------------------------------------
-With CDP disabled the L3 schemata format is::
-
- L3:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
-
-L3 schemata file details (CDP enabled via mount option to resctrl)
-------------------------------------------------------------------
-When CDP is enabled L3 control is split into two separate resources
-so you can specify independent masks for code and data like this::
-
- L3DATA:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
- L3CODE:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
-
-L2 schemata file details
-------------------------
-CDP is supported at L2 using the 'cdpl2' mount option. The schemata
-format is either::
-
- L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
-
-or
-
- L2DATA:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
- L2CODE:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
-
-
-Memory bandwidth Allocation (default mode)
-------------------------------------------
-
-Memory b/w domain is L3 cache.
-::
-
- MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
-
-Memory bandwidth Allocation specified in MBps
----------------------------------------------
-
-Memory bandwidth domain is L3 cache.
-::
-
- MB:<cache_id0>=bw_MBps0;<cache_id1>=bw_MBps1;...
-
-Slow Memory Bandwidth Allocation (SMBA)
----------------------------------------
-AMD hardware supports Slow Memory Bandwidth Allocation (SMBA).
-CXL.memory is the only supported "slow" memory device. With the
-support of SMBA, the hardware enables bandwidth allocation on
-the slow memory devices. If there are multiple such devices in
-the system, the throttling logic groups all the slow sources
-together and applies the limit on them as a whole.
-
-The presence of SMBA (with CXL.memory) is independent of slow memory
-devices presence. If there are no such devices on the system, then
-configuring SMBA will have no impact on the performance of the system.
-
-The bandwidth domain for slow memory is L3 cache. Its schemata file
-is formatted as:
-::
-
- SMBA:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
-
-Reading/writing the schemata file
----------------------------------
-Reading the schemata file will show the state of all resources
-on all domains. When writing you only need to specify those values
-which you wish to change. E.g.
-::
-
- # cat schemata
- L3DATA:0=fffff;1=fffff;2=fffff;3=fffff
- L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
- # echo "L3DATA:2=3c0;" > schemata
- # cat schemata
- L3DATA:0=fffff;1=fffff;2=3c0;3=fffff
- L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
-
-Reading/writing the schemata file (on AMD systems)
---------------------------------------------------
-Reading the schemata file will show the current bandwidth limit on all
-domains. The allocated resources are in multiples of one eighth GB/s.
-When writing to the file, you need to specify what cache id you wish to
-configure the bandwidth limit.
-
-For example, to allocate 2GB/s limit on the first cache id:
-
-::
-
- # cat schemata
- MB:0=2048;1=2048;2=2048;3=2048
- L3:0=ffff;1=ffff;2=ffff;3=ffff
-
- # echo "MB:1=16" > schemata
- # cat schemata
- MB:0=2048;1= 16;2=2048;3=2048
- L3:0=ffff;1=ffff;2=ffff;3=ffff
-
-Reading/writing the schemata file (on AMD systems) with SMBA feature
---------------------------------------------------------------------
-Reading and writing the schemata file is the same as without SMBA in
-above section.
-
-For example, to allocate 8GB/s limit on the first cache id:
-
-::
-
- # cat schemata
- SMBA:0=2048;1=2048;2=2048;3=2048
- MB:0=2048;1=2048;2=2048;3=2048
- L3:0=ffff;1=ffff;2=ffff;3=ffff
-
- # echo "SMBA:1=64" > schemata
- # cat schemata
- SMBA:0=2048;1= 64;2=2048;3=2048
- MB:0=2048;1=2048;2=2048;3=2048
- L3:0=ffff;1=ffff;2=ffff;3=ffff
-
-Cache Pseudo-Locking
-====================
-CAT enables a user to specify the amount of cache space that an
-application can fill. Cache pseudo-locking builds on the fact that a
-CPU can still read and write data pre-allocated outside its current
-allocated area on a cache hit. With cache pseudo-locking, data can be
-preloaded into a reserved portion of cache that no application can
-fill, and from that point on will only serve cache hits. The cache
-pseudo-locked memory is made accessible to user space where an
-application can map it into its virtual address space and thus have
-a region of memory with reduced average read latency.
-
-The creation of a cache pseudo-locked region is triggered by a request
-from the user to do so that is accompanied by a schemata of the region
-to be pseudo-locked. The cache pseudo-locked region is created as follows:
-
-- Create a CAT allocation CLOSNEW with a CBM matching the schemata
- from the user of the cache region that will contain the pseudo-locked
- memory. This region must not overlap with any current CAT allocation/CLOS
- on the system and no future overlap with this cache region is allowed
- while the pseudo-locked region exists.
-- Create a contiguous region of memory of the same size as the cache
- region.
-- Flush the cache, disable hardware prefetchers, disable preemption.
-- Make CLOSNEW the active CLOS and touch the allocated memory to load
- it into the cache.
-- Set the previous CLOS as active.
-- At this point the closid CLOSNEW can be released - the cache
- pseudo-locked region is protected as long as its CBM does not appear in
- any CAT allocation. Even though the cache pseudo-locked region will from
- this point on not appear in any CBM of any CLOS an application running with
- any CLOS will be able to access the memory in the pseudo-locked region since
- the region continues to serve cache hits.
-- The contiguous region of memory loaded into the cache is exposed to
- user-space as a character device.
-
-Cache pseudo-locking increases the probability that data will remain
-in the cache via carefully configuring the CAT feature and controlling
-application behavior. There is no guarantee that data is placed in
-cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict
-“locked” data from cache. Power management C-states may shrink or
-power off cache. Deeper C-states will automatically be restricted on
-pseudo-locked region creation.
-
-It is required that an application using a pseudo-locked region runs
-with affinity to the cores (or a subset of the cores) associated
-with the cache on which the pseudo-locked region resides. A sanity check
-within the code will not allow an application to map pseudo-locked memory
-unless it runs with affinity to cores associated with the cache on which the
-pseudo-locked region resides. The sanity check is only done during the
-initial mmap() handling, there is no enforcement afterwards and the
-application self needs to ensure it remains affine to the correct cores.
-
-Pseudo-locking is accomplished in two stages:
-
-1) During the first stage the system administrator allocates a portion
- of cache that should be dedicated to pseudo-locking. At this time an
- equivalent portion of memory is allocated, loaded into allocated
- cache portion, and exposed as a character device.
-2) During the second stage a user-space application maps (mmap()) the
- pseudo-locked memory into its address space.
-
-Cache Pseudo-Locking Interface
-------------------------------
-A pseudo-locked region is created using the resctrl interface as follows:
-
-1) Create a new resource group by creating a new directory in /sys/fs/resctrl.
-2) Change the new resource group's mode to "pseudo-locksetup" by writing
- "pseudo-locksetup" to the "mode" file.
-3) Write the schemata of the pseudo-locked region to the "schemata" file. All
- bits within the schemata should be "unused" according to the "bit_usage"
- file.
-
-On successful pseudo-locked region creation the "mode" file will contain
-"pseudo-locked" and a new character device with the same name as the resource
-group will exist in /dev/pseudo_lock. This character device can be mmap()'ed
-by user space in order to obtain access to the pseudo-locked memory region.
-
-An example of cache pseudo-locked region creation and usage can be found below.
-
-Cache Pseudo-Locking Debugging Interface
-----------------------------------------
-The pseudo-locking debugging interface is enabled by default (if
-CONFIG_DEBUG_FS is enabled) and can be found in /sys/kernel/debug/resctrl.
-
-There is no explicit way for the kernel to test if a provided memory
-location is present in the cache. The pseudo-locking debugging interface uses
-the tracing infrastructure to provide two ways to measure cache residency of
-the pseudo-locked region:
-
-1) Memory access latency using the pseudo_lock_mem_latency tracepoint. Data
- from these measurements are best visualized using a hist trigger (see
- example below). In this test the pseudo-locked region is traversed at
- a stride of 32 bytes while hardware prefetchers and preemption
- are disabled. This also provides a substitute visualization of cache
- hits and misses.
-2) Cache hit and miss measurements using model specific precision counters if
- available. Depending on the levels of cache on the system the pseudo_lock_l2
- and pseudo_lock_l3 tracepoints are available.
-
-When a pseudo-locked region is created a new debugfs directory is created for
-it in debugfs as /sys/kernel/debug/resctrl/<newdir>. A single
-write-only file, pseudo_lock_measure, is present in this directory. The
-measurement of the pseudo-locked region depends on the number written to this
-debugfs file:
-
-1:
- writing "1" to the pseudo_lock_measure file will trigger the latency
- measurement captured in the pseudo_lock_mem_latency tracepoint. See
- example below.
-2:
- writing "2" to the pseudo_lock_measure file will trigger the L2 cache
- residency (cache hits and misses) measurement captured in the
- pseudo_lock_l2 tracepoint. See example below.
-3:
- writing "3" to the pseudo_lock_measure file will trigger the L3 cache
- residency (cache hits and misses) measurement captured in the
- pseudo_lock_l3 tracepoint.
-
-All measurements are recorded with the tracing infrastructure. This requires
-the relevant tracepoints to be enabled before the measurement is triggered.
-
-Example of latency debugging interface
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In this example a pseudo-locked region named "newlock" was created. Here is
-how we can measure the latency in cycles of reading from this region and
-visualize this data with a histogram that is available if CONFIG_HIST_TRIGGERS
-is set::
-
- # :> /sys/kernel/tracing/trace
- # echo 'hist:keys=latency' > /sys/kernel/tracing/events/resctrl/pseudo_lock_mem_latency/trigger
- # echo 1 > /sys/kernel/tracing/events/resctrl/pseudo_lock_mem_latency/enable
- # echo 1 > /sys/kernel/debug/resctrl/newlock/pseudo_lock_measure
- # echo 0 > /sys/kernel/tracing/events/resctrl/pseudo_lock_mem_latency/enable
- # cat /sys/kernel/tracing/events/resctrl/pseudo_lock_mem_latency/hist
-
- # event histogram
- #
- # trigger info: hist:keys=latency:vals=hitcount:sort=hitcount:size=2048 [active]
- #
-
- { latency: 456 } hitcount: 1
- { latency: 50 } hitcount: 83
- { latency: 36 } hitcount: 96
- { latency: 44 } hitcount: 174
- { latency: 48 } hitcount: 195
- { latency: 46 } hitcount: 262
- { latency: 42 } hitcount: 693
- { latency: 40 } hitcount: 3204
- { latency: 38 } hitcount: 3484
-
- Totals:
- Hits: 8192
- Entries: 9
- Dropped: 0
-
-Example of cache hits/misses debugging
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In this example a pseudo-locked region named "newlock" was created on the L2
-cache of a platform. Here is how we can obtain details of the cache hits
-and misses using the platform's precision counters.
-::
-
- # :> /sys/kernel/tracing/trace
- # echo 1 > /sys/kernel/tracing/events/resctrl/pseudo_lock_l2/enable
- # echo 2 > /sys/kernel/debug/resctrl/newlock/pseudo_lock_measure
- # echo 0 > /sys/kernel/tracing/events/resctrl/pseudo_lock_l2/enable
- # cat /sys/kernel/tracing/trace
-
- # tracer: nop
- #
- # _-----=> irqs-off
- # / _----=> need-resched
- # | / _---=> hardirq/softirq
- # || / _--=> preempt-depth
- # ||| / delay
- # TASK-PID CPU# |||| TIMESTAMP FUNCTION
- # | | | |||| | |
- pseudo_lock_mea-1672 [002] .... 3132.860500: pseudo_lock_l2: hits=4097 miss=0
-
-
-Examples for RDT allocation usage
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-1) Example 1
-
-On a two socket machine (one L3 cache per socket) with just four bits
-for cache bit masks, minimum b/w of 10% with a memory bandwidth
-granularity of 10%.
-::
-
- # mount -t resctrl resctrl /sys/fs/resctrl
- # cd /sys/fs/resctrl
- # mkdir p0 p1
- # echo "L3:0=3;1=c\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
- # echo "L3:0=3;1=3\nMB:0=50;1=50" > /sys/fs/resctrl/p1/schemata
-
-The default resource group is unmodified, so we have access to all parts
-of all caches (its schemata file reads "L3:0=f;1=f").
-
-Tasks that are under the control of group "p0" may only allocate from the
-"lower" 50% on cache ID 0, and the "upper" 50% of cache ID 1.
-Tasks in group "p1" use the "lower" 50% of cache on both sockets.
-
-Similarly, tasks that are under the control of group "p0" may use a
-maximum memory b/w of 50% on socket0 and 50% on socket 1.
-Tasks in group "p1" may also use 50% memory b/w on both sockets.
-Note that unlike cache masks, memory b/w cannot specify whether these
-allocations can overlap or not. The allocations specifies the maximum
-b/w that the group may be able to use and the system admin can configure
-the b/w accordingly.
-
-If resctrl is using the software controller (mba_sc) then user can enter the
-max b/w in MB rather than the percentage values.
-::
-
- # echo "L3:0=3;1=c\nMB:0=1024;1=500" > /sys/fs/resctrl/p0/schemata
- # echo "L3:0=3;1=3\nMB:0=1024;1=500" > /sys/fs/resctrl/p1/schemata
-
-In the above example the tasks in "p1" and "p0" on socket 0 would use a max b/w
-of 1024MB where as on socket 1 they would use 500MB.
-
-2) Example 2
-
-Again two sockets, but this time with a more realistic 20-bit mask.
-
-Two real time tasks pid=1234 running on processor 0 and pid=5678 running on
-processor 1 on socket 0 on a 2-socket and dual core machine. To avoid noisy
-neighbors, each of the two real-time tasks exclusively occupies one quarter
-of L3 cache on socket 0.
-::
-
- # mount -t resctrl resctrl /sys/fs/resctrl
- # cd /sys/fs/resctrl
-
-First we reset the schemata for the default group so that the "upper"
-50% of the L3 cache on socket 0 and 50% of memory b/w cannot be used by
-ordinary tasks::
-
- # echo "L3:0=3ff;1=fffff\nMB:0=50;1=100" > schemata
-
-Next we make a resource group for our first real time task and give
-it access to the "top" 25% of the cache on socket 0.
-::
-
- # mkdir p0
- # echo "L3:0=f8000;1=fffff" > p0/schemata
-
-Finally we move our first real time task into this resource group. We
-also use taskset(1) to ensure the task always runs on a dedicated CPU
-on socket 0. Most uses of resource groups will also constrain which
-processors tasks run on.
-::
-
- # echo 1234 > p0/tasks
- # taskset -cp 1 1234
-
-Ditto for the second real time task (with the remaining 25% of cache)::
-
- # mkdir p1
- # echo "L3:0=7c00;1=fffff" > p1/schemata
- # echo 5678 > p1/tasks
- # taskset -cp 2 5678
-
-For the same 2 socket system with memory b/w resource and CAT L3 the
-schemata would look like(Assume min_bandwidth 10 and bandwidth_gran is
-10):
-
-For our first real time task this would request 20% memory b/w on socket 0.
-::
-
- # echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
-
-For our second real time task this would request an other 20% memory b/w
-on socket 0.
-::
-
- # echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
-
-3) Example 3
-
-A single socket system which has real-time tasks running on core 4-7 and
-non real-time workload assigned to core 0-3. The real-time tasks share text
-and data, so a per task association is not required and due to interaction
-with the kernel it's desired that the kernel on these cores shares L3 with
-the tasks.
-::
-
- # mount -t resctrl resctrl /sys/fs/resctrl
- # cd /sys/fs/resctrl
-
-First we reset the schemata for the default group so that the "upper"
-50% of the L3 cache on socket 0, and 50% of memory bandwidth on socket 0
-cannot be used by ordinary tasks::
-
- # echo "L3:0=3ff\nMB:0=50" > schemata
-
-Next we make a resource group for our real time cores and give it access
-to the "top" 50% of the cache on socket 0 and 50% of memory bandwidth on
-socket 0.
-::
-
- # mkdir p0
- # echo "L3:0=ffc00\nMB:0=50" > p0/schemata
-
-Finally we move core 4-7 over to the new group and make sure that the
-kernel and the tasks running there get 50% of the cache. They should
-also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
-siblings and only the real time threads are scheduled on the cores 4-7.
-::
-
- # echo F0 > p0/cpus
-
-4) Example 4
-
-The resource groups in previous examples were all in the default "shareable"
-mode allowing sharing of their cache allocations. If one resource group
-configures a cache allocation then nothing prevents another resource group
-to overlap with that allocation.
-
-In this example a new exclusive resource group will be created on a L2 CAT
-system with two L2 cache instances that can be configured with an 8-bit
-capacity bitmask. The new exclusive resource group will be configured to use
-25% of each cache instance.
-::
-
- # mount -t resctrl resctrl /sys/fs/resctrl/
- # cd /sys/fs/resctrl
-
-First, we observe that the default group is configured to allocate to all L2
-cache::
-
- # cat schemata
- L2:0=ff;1=ff
-
-We could attempt to create the new resource group at this point, but it will
-fail because of the overlap with the schemata of the default group::
-
- # mkdir p0
- # echo 'L2:0=0x3;1=0x3' > p0/schemata
- # cat p0/mode
- shareable
- # echo exclusive > p0/mode
- -sh: echo: write error: Invalid argument
- # cat info/last_cmd_status
- schemata overlaps
-
-To ensure that there is no overlap with another resource group the default
-resource group's schemata has to change, making it possible for the new
-resource group to become exclusive.
-::
-
- # echo 'L2:0=0xfc;1=0xfc' > schemata
- # echo exclusive > p0/mode
- # grep . p0/*
- p0/cpus:0
- p0/mode:exclusive
- p0/schemata:L2:0=03;1=03
- p0/size:L2:0=262144;1=262144
-
-A new resource group will on creation not overlap with an exclusive resource
-group::
-
- # mkdir p1
- # grep . p1/*
- p1/cpus:0
- p1/mode:shareable
- p1/schemata:L2:0=fc;1=fc
- p1/size:L2:0=786432;1=786432
-
-The bit_usage will reflect how the cache is used::
-
- # cat info/L2/bit_usage
- 0=SSSSSSEE;1=SSSSSSEE
-
-A resource group cannot be forced to overlap with an exclusive resource group::
-
- # echo 'L2:0=0x1;1=0x1' > p1/schemata
- -sh: echo: write error: Invalid argument
- # cat info/last_cmd_status
- overlaps with exclusive group
-
-Example of Cache Pseudo-Locking
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Lock portion of L2 cache from cache id 1 using CBM 0x3. Pseudo-locked
-region is exposed at /dev/pseudo_lock/newlock that can be provided to
-application for argument to mmap().
-::
-
- # mount -t resctrl resctrl /sys/fs/resctrl/
- # cd /sys/fs/resctrl
-
-Ensure that there are bits available that can be pseudo-locked, since only
-unused bits can be pseudo-locked the bits to be pseudo-locked needs to be
-removed from the default resource group's schemata::
-
- # cat info/L2/bit_usage
- 0=SSSSSSSS;1=SSSSSSSS
- # echo 'L2:1=0xfc' > schemata
- # cat info/L2/bit_usage
- 0=SSSSSSSS;1=SSSSSS00
-
-Create a new resource group that will be associated with the pseudo-locked
-region, indicate that it will be used for a pseudo-locked region, and
-configure the requested pseudo-locked region capacity bitmask::
-
- # mkdir newlock
- # echo pseudo-locksetup > newlock/mode
- # echo 'L2:1=0x3' > newlock/schemata
-
-On success the resource group's mode will change to pseudo-locked, the
-bit_usage will reflect the pseudo-locked region, and the character device
-exposing the pseudo-locked region will exist::
-
- # cat newlock/mode
- pseudo-locked
- # cat info/L2/bit_usage
- 0=SSSSSSSS;1=SSSSSSPP
- # ls -l /dev/pseudo_lock/newlock
- crw------- 1 root root 243, 0 Apr 3 05:01 /dev/pseudo_lock/newlock
-
-::
-
- /*
- * Example code to access one page of pseudo-locked cache region
- * from user space.
- */
- #define _GNU_SOURCE
- #include <fcntl.h>
- #include <sched.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <sys/mman.h>
-
- /*
- * It is required that the application runs with affinity to only
- * cores associated with the pseudo-locked region. Here the cpu
- * is hardcoded for convenience of example.
- */
- static int cpuid = 2;
-
- int main(int argc, char *argv[])
- {
- cpu_set_t cpuset;
- long page_size;
- void *mapping;
- int dev_fd;
- int ret;
-
- page_size = sysconf(_SC_PAGESIZE);
-
- CPU_ZERO(&cpuset);
- CPU_SET(cpuid, &cpuset);
- ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
- if (ret < 0) {
- perror("sched_setaffinity");
- exit(EXIT_FAILURE);
- }
-
- dev_fd = open("/dev/pseudo_lock/newlock", O_RDWR);
- if (dev_fd < 0) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-
- mapping = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
- dev_fd, 0);
- if (mapping == MAP_FAILED) {
- perror("mmap");
- close(dev_fd);
- exit(EXIT_FAILURE);
- }
-
- /* Application interacts with pseudo-locked memory @mapping */
-
- ret = munmap(mapping, page_size);
- if (ret < 0) {
- perror("munmap");
- close(dev_fd);
- exit(EXIT_FAILURE);
- }
-
- close(dev_fd);
- exit(EXIT_SUCCESS);
- }
-
-Locking between applications
-----------------------------
-
-Certain operations on the resctrl filesystem, composed of read/writes
-to/from multiple files, must be atomic.
-
-As an example, the allocation of an exclusive reservation of L3 cache
-involves:
-
- 1. Read the cbmmasks from each directory or the per-resource "bit_usage"
- 2. Find a contiguous set of bits in the global CBM bitmask that is clear
- in any of the directory cbmmasks
- 3. Create a new directory
- 4. Set the bits found in step 2 to the new directory "schemata" file
-
-If two applications attempt to allocate space concurrently then they can
-end up allocating the same bits so the reservations are shared instead of
-exclusive.
-
-To coordinate atomic operations on the resctrlfs and to avoid the problem
-above, the following locking procedure is recommended:
-
-Locking is based on flock, which is available in libc and also as a shell
-script command
-
-Write lock:
-
- A) Take flock(LOCK_EX) on /sys/fs/resctrl
- B) Read/write the directory structure.
- C) funlock
-
-Read lock:
-
- A) Take flock(LOCK_SH) on /sys/fs/resctrl
- B) If success read the directory structure.
- C) funlock
-
-Example with bash::
-
- # Atomically read directory structure
- $ flock -s /sys/fs/resctrl/ find /sys/fs/resctrl
-
- # Read directory contents and create new subdirectory
-
- $ cat create-dir.sh
- find /sys/fs/resctrl/ > output.txt
- mask = function-of(output.txt)
- mkdir /sys/fs/resctrl/newres/
- echo mask > /sys/fs/resctrl/newres/schemata
-
- $ flock /sys/fs/resctrl/ ./create-dir.sh
-
-Example with C::
-
- /*
- * Example code do take advisory locks
- * before accessing resctrl filesystem
- */
- #include <sys/file.h>
- #include <stdlib.h>
-
- void resctrl_take_shared_lock(int fd)
- {
- int ret;
-
- /* take shared lock on resctrl filesystem */
- ret = flock(fd, LOCK_SH);
- if (ret) {
- perror("flock");
- exit(-1);
- }
- }
-
- void resctrl_take_exclusive_lock(int fd)
- {
- int ret;
-
- /* release lock on resctrl filesystem */
- ret = flock(fd, LOCK_EX);
- if (ret) {
- perror("flock");
- exit(-1);
- }
- }
-
- void resctrl_release_lock(int fd)
- {
- int ret;
-
- /* take shared lock on resctrl filesystem */
- ret = flock(fd, LOCK_UN);
- if (ret) {
- perror("flock");
- exit(-1);
- }
- }
-
- void main(void)
- {
- int fd, ret;
-
- fd = open("/sys/fs/resctrl", O_DIRECTORY);
- if (fd == -1) {
- perror("open");
- exit(-1);
- }
- resctrl_take_shared_lock(fd);
- /* code to read directory contents */
- resctrl_release_lock(fd);
-
- resctrl_take_exclusive_lock(fd);
- /* code to read and write directory contents */
- resctrl_release_lock(fd);
- }
-
-Examples for RDT Monitoring along with allocation usage
-=======================================================
-Reading monitored data
-----------------------
-Reading an event file (for ex: mon_data/mon_L3_00/llc_occupancy) would
-show the current snapshot of LLC occupancy of the corresponding MON
-group or CTRL_MON group.
-
-
-Example 1 (Monitor CTRL_MON group and subset of tasks in CTRL_MON group)
-------------------------------------------------------------------------
-On a two socket machine (one L3 cache per socket) with just four bits
-for cache bit masks::
-
- # mount -t resctrl resctrl /sys/fs/resctrl
- # cd /sys/fs/resctrl
- # mkdir p0 p1
- # echo "L3:0=3;1=c" > /sys/fs/resctrl/p0/schemata
- # echo "L3:0=3;1=3" > /sys/fs/resctrl/p1/schemata
- # echo 5678 > p1/tasks
- # echo 5679 > p1/tasks
-
-The default resource group is unmodified, so we have access to all parts
-of all caches (its schemata file reads "L3:0=f;1=f").
-
-Tasks that are under the control of group "p0" may only allocate from the
-"lower" 50% on cache ID 0, and the "upper" 50% of cache ID 1.
-Tasks in group "p1" use the "lower" 50% of cache on both sockets.
-
-Create monitor groups and assign a subset of tasks to each monitor group.
-::
-
- # cd /sys/fs/resctrl/p1/mon_groups
- # mkdir m11 m12
- # echo 5678 > m11/tasks
- # echo 5679 > m12/tasks
-
-fetch data (data shown in bytes)
-::
-
- # cat m11/mon_data/mon_L3_00/llc_occupancy
- 16234000
- # cat m11/mon_data/mon_L3_01/llc_occupancy
- 14789000
- # cat m12/mon_data/mon_L3_00/llc_occupancy
- 16789000
-
-The parent ctrl_mon group shows the aggregated data.
-::
-
- # cat /sys/fs/resctrl/p1/mon_data/mon_l3_00/llc_occupancy
- 31234000
-
-Example 2 (Monitor a task from its creation)
---------------------------------------------
-On a two socket machine (one L3 cache per socket)::
-
- # mount -t resctrl resctrl /sys/fs/resctrl
- # cd /sys/fs/resctrl
- # mkdir p0 p1
-
-An RMID is allocated to the group once its created and hence the <cmd>
-below is monitored from its creation.
-::
-
- # echo $$ > /sys/fs/resctrl/p1/tasks
- # <cmd>
-
-Fetch the data::
-
- # cat /sys/fs/resctrl/p1/mon_data/mon_l3_00/llc_occupancy
- 31789000
-
-Example 3 (Monitor without CAT support or before creating CAT groups)
----------------------------------------------------------------------
-
-Assume a system like HSW has only CQM and no CAT support. In this case
-the resctrl will still mount but cannot create CTRL_MON directories.
-But user can create different MON groups within the root group thereby
-able to monitor all tasks including kernel threads.
-
-This can also be used to profile jobs cache size footprint before being
-able to allocate them to different allocation groups.
-::
-
- # mount -t resctrl resctrl /sys/fs/resctrl
- # cd /sys/fs/resctrl
- # mkdir mon_groups/m01
- # mkdir mon_groups/m02
-
- # echo 3478 > /sys/fs/resctrl/mon_groups/m01/tasks
- # echo 2467 > /sys/fs/resctrl/mon_groups/m02/tasks
-
-Monitor the groups separately and also get per domain data. From the
-below its apparent that the tasks are mostly doing work on
-domain(socket) 0.
-::
-
- # cat /sys/fs/resctrl/mon_groups/m01/mon_L3_00/llc_occupancy
- 31234000
- # cat /sys/fs/resctrl/mon_groups/m01/mon_L3_01/llc_occupancy
- 34555
- # cat /sys/fs/resctrl/mon_groups/m02/mon_L3_00/llc_occupancy
- 31234000
- # cat /sys/fs/resctrl/mon_groups/m02/mon_L3_01/llc_occupancy
- 32789
-
-
-Example 4 (Monitor real time tasks)
------------------------------------
-
-A single socket system which has real time tasks running on cores 4-7
-and non real time tasks on other cpus. We want to monitor the cache
-occupancy of the real time threads on these cores.
-::
-
- # mount -t resctrl resctrl /sys/fs/resctrl
- # cd /sys/fs/resctrl
- # mkdir p1
-
-Move the cpus 4-7 over to p1::
-
- # echo f0 > p1/cpus
-
-View the llc occupancy snapshot::
-
- # cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
- 11234000
-
-Intel RDT Errata
-================
-
-Intel MBM Counters May Report System Memory Bandwidth Incorrectly
------------------------------------------------------------------
-
-Errata SKX99 for Skylake server and BDF102 for Broadwell server.
-
-Problem: Intel Memory Bandwidth Monitoring (MBM) counters track metrics
-according to the assigned Resource Monitor ID (RMID) for that logical
-core. The IA32_QM_CTR register (MSR 0xC8E), used to report these
-metrics, may report incorrect system bandwidth for certain RMID values.
-
-Implication: Due to the errata, system memory bandwidth may not match
-what is reported.
-
-Workaround: MBM total and local readings are corrected according to the
-following correction factor table:
-
-+---------------+---------------+---------------+-----------------+
-|core count |rmid count |rmid threshold |correction factor|
-+---------------+---------------+---------------+-----------------+
-|1 |8 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|2 |16 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|3 |24 |15 |0.969650 |
-+---------------+---------------+---------------+-----------------+
-|4 |32 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|6 |48 |31 |0.969650 |
-+---------------+---------------+---------------+-----------------+
-|7 |56 |47 |1.142857 |
-+---------------+---------------+---------------+-----------------+
-|8 |64 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|9 |72 |63 |1.185115 |
-+---------------+---------------+---------------+-----------------+
-|10 |80 |63 |1.066553 |
-+---------------+---------------+---------------+-----------------+
-|11 |88 |79 |1.454545 |
-+---------------+---------------+---------------+-----------------+
-|12 |96 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|13 |104 |95 |1.230769 |
-+---------------+---------------+---------------+-----------------+
-|14 |112 |95 |1.142857 |
-+---------------+---------------+---------------+-----------------+
-|15 |120 |95 |1.066667 |
-+---------------+---------------+---------------+-----------------+
-|16 |128 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|17 |136 |127 |1.254863 |
-+---------------+---------------+---------------+-----------------+
-|18 |144 |127 |1.185255 |
-+---------------+---------------+---------------+-----------------+
-|19 |152 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|20 |160 |127 |1.066667 |
-+---------------+---------------+---------------+-----------------+
-|21 |168 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|22 |176 |159 |1.454334 |
-+---------------+---------------+---------------+-----------------+
-|23 |184 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|24 |192 |127 |0.969744 |
-+---------------+---------------+---------------+-----------------+
-|25 |200 |191 |1.280246 |
-+---------------+---------------+---------------+-----------------+
-|26 |208 |191 |1.230921 |
-+---------------+---------------+---------------+-----------------+
-|27 |216 |0 |1.000000 |
-+---------------+---------------+---------------+-----------------+
-|28 |224 |191 |1.143118 |
-+---------------+---------------+---------------+-----------------+
-
-If rmid > rmid threshold, MBM total and local values should be multiplied
-by the correction factor.
-
-See:
-
-1. Erratum SKX99 in Intel Xeon Processor Scalable Family Specification Update:
-http://web.archive.org/web/20200716124958/https://www.intel.com/content/www/us/en/processors/xeon/scalable/xeon-scalable-spec-update.html
-
-2. Erratum BDF102 in Intel Xeon E5-2600 v4 Processor Product Family Specification Update:
-http://web.archive.org/web/20191125200531/https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-v4-spec-update.pdf
-
-3. The errata in Intel Resource Director Technology (Intel RDT) on 2nd Generation Intel Xeon Scalable Processors Reference Manual:
-https://software.intel.com/content/www/us/en/develop/articles/intel-resource-director-technology-rdt-reference-manual.html
-
-for further information.
diff --git a/Documentation/arch/x86/resume.svg b/Documentation/arch/x86/resume.svg
new file mode 100644
index 000000000000..ad8839f762bf
--- /dev/null
+++ b/Documentation/arch/x86/resume.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Do not edit this file with editors other than draw.io -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="582px" height="1152px" viewBox="-0.5 -0.5 582 1152" content="&lt;mxfile host=&quot;confluence.amd.com&quot; agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0&quot; version=&quot;24.7.10&quot; scale=&quot;1&quot; border=&quot;0&quot;&gt;&#10; &lt;diagram id=&quot;lFF5s3GfZ4Py3fAf5dUH&quot; name=&quot;Page-1&quot;&gt;&#10; &lt;mxGraphModel dx=&quot;2364&quot; dy=&quot;1473&quot; grid=&quot;1&quot; gridSize=&quot;10&quot; guides=&quot;1&quot; tooltips=&quot;1&quot; connect=&quot;1&quot; arrows=&quot;1&quot; fold=&quot;1&quot; page=&quot;0&quot; pageScale=&quot;1&quot; pageWidth=&quot;850&quot; pageHeight=&quot;1100&quot; math=&quot;0&quot; shadow=&quot;0&quot;&gt;&#10; &lt;root&gt;&#10; &lt;mxCell id=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;1&quot; parent=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-10&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-1&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-1&quot; value=&quot;Wakeup event occurs&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.start_2;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-8&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-4&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-56&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot; value=&quot;MP1 hands off control to OS&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-4&quot; value=&quot;&amp;lt;span style=&amp;quot;background-color: rgb(232, 205, 151);&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: center; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; float: none; display: inline !important;&amp;quot;&amp;gt;OS Moves one core out of ACPI C3&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;&amp;lt;/span&amp;gt;&quot; style=&quot;verticalLabelPosition=middle;verticalAlign=middle;html=1;shape=process;whiteSpace=wrap;rounded=1;size=0.14;arcSize=6;labelPosition=center;align=center;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;-170&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-11&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-3&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-9&quot; value=&quot;MP0/MP1 boot process&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-80&quot; y=&quot;-190&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-27&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-13&quot; value=&quot;OS checks all wake sources&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;-40&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-29&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-44&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-29&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.28&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;8&quot; y=&quot;-8&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-34&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-41&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-34&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.1165&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;153&quot; y=&quot;11&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot; value=&quot;ACPI fixed &amp;lt;br&amp;gt;event active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;260&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-28&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-15&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-43&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-28&quot;&gt;&#10; &lt;mxGeometry y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;8&quot; y=&quot;-15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-57&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-58&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-57&quot;&gt;&#10; &lt;mxGeometry x=&quot;0.0145&quot; y=&quot;1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;102&quot; y=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-16&quot; value=&quot;IRQ other &amp;lt;br&amp;gt;than ACPI SCI active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-30&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;560&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-65&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-30&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.4694&quot; y=&quot;1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-36&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-45&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-36&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.2867&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;5&quot; y=&quot;8&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-17&quot; value=&quot;GPIO&amp;lt;br&amp;gt;IRQ shared&amp;lt;br&amp;gt;with SCI&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;410&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-31&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;660&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;710&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-32&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;130&quot; y=&quot;810&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-48&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-32&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.1714&quot; y=&quot;-4&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;14&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-52&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-53&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-52&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.5259&quot; y=&quot;-5&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;220&quot; y=&quot;15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-62&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-64&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-62&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.7472&quot; y=&quot;3&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-92&quot; y=&quot;13&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot; value=&quot;Any PM&amp;lt;br&amp;gt;wakeup event&amp;lt;br&amp;gt;pending&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;860&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-24&quot; value=&quot;Kernel resumes system&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;-20&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-26&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-24&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot; value=&quot;uPEP driver removes OS_HINT&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-240&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-37&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-40&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-37&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.56&quot; y=&quot;-2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;67&quot; y=&quot;12&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-60&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=-0.008;entryY=0.422;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-38&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-50&quot; y=&quot;535&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;20&quot; y=&quot;685&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-61&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-60&quot;&gt;&#10; &lt;mxGeometry x=&quot;0.1126&quot; y=&quot;-3&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-16&quot; y=&quot;-85&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-35&quot; value=&quot;Any GPIO&amp;lt;br&amp;gt;w/ WAKESTS&amp;lt;br&amp;gt;active&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;-90&quot; y=&quot;410&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-38&quot; value=&quot;Check for ACPI Notify() events&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;560&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-49&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-23&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-50&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-25&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-51&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];fontStyle=1;labelBackgroundColor=none;fontColor=#393C56;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;jeVlbFHk8Qahp5zcIn_D-50&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.4506&quot; y=&quot;-2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;215&quot; y=&quot;12&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-39&quot; value=&quot;Any GPE &amp;lt;br&amp;gt;pending&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;80&quot; y=&quot;710&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-63&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot; target=&quot;jeVlbFHk8Qahp5zcIn_D-55&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-54&quot; value=&quot;OS moves active &amp;lt;br&amp;gt;core back to&amp;lt;br&amp;gt;ACPI C3&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;110&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;jeVlbFHk8Qahp5zcIn_D-55&quot; value=&quot;MP1 puts system back to sleep&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F27979;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;240&quot; y=&quot;-20&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;/root&gt;&#10; &lt;/mxGraphModel&gt;&#10; &lt;/diagram&gt;&#10;&lt;/mxfile&gt;&#10;"><defs/><g><g data-cell-id="0"><g data-cell-id="1"><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-10"><g><path d="M 101 51 L 154.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 51 L 152.88 54.5 L 154.63 51 L 152.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-1"><g><ellipse cx="51" cy="51" rx="50" ry="50" fill="#0cf232" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Wakeup event occurs</div></div></div></foreignObject><image x="2" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-8"><g><path d="M 421 51 L 474.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 479.88 51 L 472.88 54.5 L 474.63 51 L 472.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-56"><g><path d="M 371 101 L 371 144.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 149.88 L 367.5 142.88 L 371 144.63 L 374.5 142.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-3"><g><rect x="321" y="1" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP1 hands off control to OS</div></div></div></foreignObject><image x="322" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-4"><g><rect x="481" y="21" width="100" height="60" rx="3.6" ry="3.6" fill="#f2cc8f" stroke="#e07a5f" pointer-events="all"/><path d="M 495 21 L 495 81 M 567 21 L 567 81" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 70px; height: 1px; padding-top: 51px; margin-left: 496px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><span style="background-color: rgb(232, 205, 151);"><span style="color: rgb(0, 0, 0); font-family: Helvetica; font-size: 12px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: center; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; white-space: normal; text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; float: none; display: inline !important;">OS Moves one core out of ACPI C3</span><br /></span></div></div></div></foreignObject><image x="496" y="30" width="70" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-11"><g><path d="M 261 51 L 314.63 51" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 319.88 51 L 312.88 54.5 L 314.63 51 L 312.88 47.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-9"><g><rect x="161" y="1" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP0/MP1 boot process</div></div></div></foreignObject><image x="162" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-27"><g><path d="M 371 251 L 371 294.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 299.88 L 367.5 292.88 L 371 294.63 L 374.5 292.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-13"><g><rect x="321" y="151" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">OS checks all wake sources</div></div></div></foreignObject><image x="322" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-29"><g><path d="M 371 551 L 371 594.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 599.88 L 367.5 592.88 L 371 594.63 L 374.5 592.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-44"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 562px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="556" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-34"><g><path d="M 321 501 L 51 501 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-41"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 512px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="506" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-15"><g><path d="M 371 451 L 421 501 L 371 551 L 321 501 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">ACPI fixed <br />event active</div></div></div></foreignObject><image x="322" y="487" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-28"><g><path d="M 371 401 L 371 444.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 449.88 L 367.5 442.88 L 371 444.63 L 374.5 442.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-43"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 412px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="406" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-57"><g><path d="M 321 351 L 107.37 351" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 102.12 351 L 109.12 347.5 L 107.37 351 L 109.12 354.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-58"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 362px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="356" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-16"><g><path d="M 371 301 L 421 351 L 371 401 L 321 351 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">IRQ other <br />than ACPI SCI active</div></div></div></foreignObject><image x="322" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-30"><g><path d="M 371 701 L 371 726 L 371 744.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 749.88 L 367.5 742.88 L 371 744.63 L 374.5 742.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-65"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 715px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="709" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-36"><g><path d="M 321 651 L 257.37 651" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 252.12 651 L 259.12 647.5 L 257.37 651 L 259.12 654.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-45"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 302px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="293" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-17"><g><path d="M 371 601 L 421 651 L 371 701 L 321 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPIO<br />IRQ shared<br />with SCI</div></div></div></foreignObject><image x="322" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-31"><g><path d="M 371 851 L 371 894.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 899.88 L 367.5 892.88 L 371 894.63 L 374.5 892.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-32"><g><path d="M 371 1001 L 371.5 1026.5 L 371.13 1044.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371.02 1049.88 L 367.67 1042.81 L 371.13 1044.63 L 374.66 1042.96 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-48"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1023px; margin-left: 382px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="375.5" y="1017" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-52"><g><path d="M 321 1101 L 51 1101 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-53"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1112px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="1106" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-62"><g><path d="M 421 1101 L 531 1101 L 531 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 531 402.12 L 534.5 409.12 L 531 407.37 L 527.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-64"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 1112px; margin-left: 432px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="425.5" y="1106" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-23"><g><path d="M 371 1051 L 421 1101 L 371 1151 L 321 1101 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1101px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any PM<br />wakeup event<br />pending</div></div></div></foreignObject><image x="322" y="1080" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-24"><g><path d="M 31.61 171 L 70.39 171 C 87.29 171 101 184.43 101 201 C 101 217.57 87.29 231 70.39 231 L 31.61 231 C 14.71 231 1 217.57 1 201 C 1 184.43 14.71 171 31.61 171 Z" fill="#0cf232" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Kernel resumes system</div></div></div></foreignObject><image x="2" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-26"><g><path d="M 51 301 L 51 237.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 232.12 L 54.5 239.12 L 51 237.37 L 47.5 239.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-25"><g><rect x="1" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">uPEP driver removes OS_HINT</div></div></div></foreignObject><image x="2" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-37"><g><path d="M 151 651 L 51 651 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-40"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 142px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="133" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-60"><g><path d="M 201 701 L 201 793.2 L 313.83 793.2" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 319.08 793.2 L 312.08 796.7 L 313.83 793.2 L 312.08 789.7 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-61"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 712px; margin-left: 211px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="204.5" y="706" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-35"><g><path d="M 201 601 L 251 651 L 201 701 L 151 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 152px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any GPIO<br />w/ WAKESTS<br />active</div></div></div></foreignObject><image x="152" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-38"><g><rect x="321" y="751" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 801px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Check for ACPI Notify() events</div></div></div></foreignObject><image x="322" y="787" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-49"><g><path d="M 371 1001 L 371 1044.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 371 1049.88 L 367.5 1042.88 L 371 1044.63 L 374.5 1042.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-50"><g><path d="M 321 951 L 51 951 L 51 407.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 402.12 L 54.5 409.12 L 51 407.37 L 47.5 409.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-51"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 962px; margin-left: 312px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="303" y="956" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-39"><g><path d="M 371 901 L 421 951 L 371 1001 L 321 951 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 951px; margin-left: 322px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Any GPE <br />pending</div></div></div></foreignObject><image x="322" y="937" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-63"><g><path d="M 531 301 L 531 237.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 531 232.12 L 534.5 239.12 L 531 237.37 L 527.5 239.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-54"><g><rect x="481" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 482px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">OS moves active <br />core back to<br />ACPI C3</div></div></div></foreignObject><image x="482" y="330" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="jeVlbFHk8Qahp5zcIn_D-55"><g><path d="M 511.61 171 L 550.39 171 C 567.29 171 581 184.43 581 201 C 581 217.57 567.29 231 550.39 231 L 511.61 231 C 494.71 231 481 217.57 481 201 C 481 184.43 494.71 171 511.61 171 Z" fill="#f27979" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 482px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">MP1 puts system back to sleep</div></div></div></foreignObject><image x="482" y="187" width="98" height="32" xlink:href=""/></switch></g></g></g></g></g></g></svg> \ No newline at end of file
diff --git a/Documentation/arch/x86/suspend.svg b/Documentation/arch/x86/suspend.svg
new file mode 100644
index 000000000000..a69073c018d5
--- /dev/null
+++ b/Documentation/arch/x86/suspend.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Do not edit this file with editors other than draw.io -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="407px" height="1132px" viewBox="-0.5 -0.5 407 1132" content="&lt;mxfile host=&quot;confluence.amd.com&quot; agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0&quot; version=&quot;24.7.10&quot; scale=&quot;1&quot; border=&quot;0&quot;&gt;&#10; &lt;diagram id=&quot;46NsKM0iVOHTgNer6hpB&quot; name=&quot;Page-1&quot;&gt;&#10; &lt;mxGraphModel dx=&quot;1964&quot; dy=&quot;1073&quot; grid=&quot;1&quot; gridSize=&quot;10&quot; guides=&quot;1&quot; tooltips=&quot;1&quot; connect=&quot;1&quot; arrows=&quot;1&quot; fold=&quot;1&quot; page=&quot;0&quot; pageScale=&quot;1&quot; pageWidth=&quot;850&quot; pageHeight=&quot;1100&quot; math=&quot;0&quot; shadow=&quot;0&quot;&gt;&#10; &lt;root&gt;&#10; &lt;mxCell id=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;1&quot; parent=&quot;0&quot; /&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-21&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-4&quot; target=&quot;8N6JJebqrzA787TgpwUj-12&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-4&quot; value=&quot;SFH driver notifies MP2 to stop all sensor collection&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;420&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-6&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-51&quot; target=&quot;8N6JJebqrzA787TgpwUj-4&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;330&quot; y=&quot;400&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;170&quot; y=&quot;450&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-37&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-6&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.2133&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-22&quot; y=&quot;16&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-9&quot; value=&quot;Abort suspend; details logged in dmesg&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F27979;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;425&quot; y=&quot;140&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-12&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;420&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-18&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;380&quot; y=&quot;320&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-19&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-18&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.3265&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-27&quot; y=&quot;10&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-24&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-12&quot; target=&quot;8N6JJebqrzA787TgpwUj-28&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;340&quot; y=&quot;570&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;180&quot; y=&quot;620&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-38&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-24&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.0038&quot; y=&quot;2&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint y=&quot;13&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-26&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;exitX=1;exitY=0.5;exitDx=0;exitDy=0;exitPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-12&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;410&quot; y=&quot;530&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;555&quot; y=&quot;230&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;Array as=&quot;points&quot;&gt;&#10; &lt;mxPoint x=&quot;475&quot; y=&quot;470&quot; /&gt;&#10; &lt;/Array&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-35&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-26&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.7458&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-1&quot; y=&quot;10&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-30&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-28&quot; target=&quot;8N6JJebqrzA787TgpwUj-29&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-28&quot; value=&quot;All devices go into deepest D-state or F-state&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;570&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-29&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;570&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-31&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-29&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;330&quot; y=&quot;760&quot; as=&quot;sourcePoint&quot; /&gt;&#10; &lt;mxPoint x=&quot;170&quot; y=&quot;720&quot; as=&quot;targetPoint&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-64&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-31&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.0683&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint y=&quot;15&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-34&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-29&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-36&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-34&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.8315&quot; y=&quot;-1&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;2&quot; y=&quot;9&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-46&quot; value=&quot;GPIO driver suspends non-wake GPIOs&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;720&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-47&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-48&quot; target=&quot;8N6JJebqrzA787TgpwUj-50&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-48&quot; value=&quot;Suspend initiated from userspace&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.start_2;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;120&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-49&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-50&quot; target=&quot;8N6JJebqrzA787TgpwUj-51&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-50&quot; value=&quot;GPU driver shuts down clocks and sends SMU messages&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;270&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-51&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;270&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-53&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-54&quot; target=&quot;8N6JJebqrzA787TgpwUj-56&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-54&quot; value=&quot;ACPI s2idle driver notifies EC using _DSM&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;870&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-55&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-56&quot; target=&quot;8N6JJebqrzA787TgpwUj-58&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-56&quot; value=&quot;uPEP driver (amd-pmc) sends OS_HINT&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;1010&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-57&quot; value=&quot;&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;labelBackgroundColor=none;strokeColor=#E07A5F;fontColor=default;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-58&quot; target=&quot;8N6JJebqrzA787TgpwUj-59&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-58&quot; value=&quot;Put all x86 CPU cores into ACPI C3&quot; style=&quot;rounded=1;whiteSpace=wrap;html=1;absoluteArcSize=1;arcSize=14;strokeWidth=2;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;120&quot; y=&quot;1150&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-59&quot; value=&quot;s2idle loop waiting for IRQ &amp;lt;br&amp;gt;to wake&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.terminator;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#0CF232;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;1170&quot; width=&quot;100&quot; height=&quot;60&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-65&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;exitPerimeter=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-60&quot; target=&quot;8N6JJebqrzA787TgpwUj-54&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-66&quot; value=&quot;no&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;8N6JJebqrzA787TgpwUj-65&quot;&gt;&#10; &lt;mxGeometry x=&quot;-0.144&quot; y=&quot;-4&quot; relative=&quot;1&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-4&quot; y=&quot;14&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-60&quot; value=&quot;Failures?&quot; style=&quot;strokeWidth=2;html=1;shape=mxgraph.flowchart.decision;whiteSpace=wrap;labelBackgroundColor=none;fillColor=#F2CC8F;strokeColor=#E07A5F;fontColor=#393C56;&quot; vertex=&quot;1&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;280&quot; y=&quot;720&quot; width=&quot;100&quot; height=&quot;100&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-61&quot; style=&quot;edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=1;entryDx=0;entryDy=0;entryPerimeter=0;strokeColor=#E07A5F;fontColor=#393C56;fillColor=#F2CC8F;&quot; edge=&quot;1&quot; parent=&quot;1&quot; source=&quot;8N6JJebqrzA787TgpwUj-60&quot; target=&quot;8N6JJebqrzA787TgpwUj-9&quot;&gt;&#10; &lt;mxGeometry relative=&quot;1&quot; as=&quot;geometry&quot; /&gt;&#10; &lt;/mxCell&gt;&#10; &lt;mxCell id=&quot;8N6JJebqrzA787TgpwUj-62&quot; value=&quot;yes&quot; style=&quot;edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];labelBackgroundColor=none;fontColor=#393C56;fontStyle=1;&quot; vertex=&quot;1&quot; connectable=&quot;0&quot; parent=&quot;1&quot;&gt;&#10; &lt;mxGeometry x=&quot;440&quot; y=&quot;620&quot; as=&quot;geometry&quot;&gt;&#10; &lt;mxPoint x=&quot;-14&quot; y=&quot;160&quot; as=&quot;offset&quot; /&gt;&#10; &lt;/mxGeometry&gt;&#10; &lt;/mxCell&gt;&#10; &lt;/root&gt;&#10; &lt;/mxGraphModel&gt;&#10; &lt;/diagram&gt;&#10;&lt;/mxfile&gt;&#10;"><defs/><g><g data-cell-id="0"><g data-cell-id="1"><g data-cell-id="8N6JJebqrzA787TgpwUj-21"><g><path d="M 101 351 L 154.63 351" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 351 L 152.88 354.5 L 154.63 351 L 152.88 347.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-4"><g><rect x="1" y="301" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">SFH driver notifies MP2 to stop all sensor collection</div></div></div></foreignObject><image x="2" y="322.5" width="98" height="61" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-6"><g><path d="M 211 251 L 211 276 L 51 276 L 51 294.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 299.88 L 47.5 292.88 L 51 294.63 L 54.5 292.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-37"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 292px; margin-left: 132px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="125.5" y="286" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-9"><g><path d="M 336.61 21 L 375.39 21 C 392.29 21 406 34.43 406 51 C 406 67.57 392.29 81 375.39 81 L 336.61 81 C 319.71 81 306 67.57 306 51 C 306 34.43 319.71 21 336.61 21 Z" fill="#f27979" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Abort suspend; details logged in dmesg</div></div></div></foreignObject><image x="307" y="30" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-12"><g><path d="M 211 301 L 261 351 L 211 401 L 161 351 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 351px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="344.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-18"><g><path d="M 261 201 L 356 201.5 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-19"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 212px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="206" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-24"><g><path d="M 211 401 L 211 426 L 51 426 L 51 444.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 449.88 L 47.5 442.88 L 51 444.63 L 54.5 442.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-38"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 442px; margin-left: 132px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">no</div></div></div></foreignObject><image x="125.5" y="436" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-26"><g><path d="M 261 351 L 356 351 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-35"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 362px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="356" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-30"><g><path d="M 101 501 L 154.63 501" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 501 L 152.88 504.5 L 154.63 501 L 152.88 497.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-28"><g><rect x="1" y="451" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">All devices go into deepest D-state or F-state</div></div></div></foreignObject><image x="2" y="480" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-29"><g><path d="M 211 451 L 261 501 L 211 551 L 161 501 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 501px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="494.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-31"><g><path d="M 211 551 L 211 576 L 51.5 576 L 51.13 594.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51.02 599.88 L 47.66 592.81 L 51.13 594.63 L 54.66 592.95 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-64"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 592px; margin-left: 139px;"><div data-drawio-colors="color: #393C56; background-color: rgb(255, 255, 255); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; background-color: rgb(255, 255, 255); white-space: nowrap;">no</div></div></div></foreignObject><image x="132.5" y="586" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-34"><g><path d="M 261 501 L 356 501 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-36"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 512px; margin-left: 307px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="298" y="506" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-46"><g><rect x="1" y="601" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPIO driver suspends non-wake GPIOs</div></div></div></foreignObject><image x="2" y="630" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-47"><g><path d="M 51 101 L 51 144.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 149.88 L 47.5 142.88 L 51 144.63 L 54.5 142.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-48"><g><ellipse cx="51" cy="51" rx="50" ry="50" fill="#0cf232" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 51px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Suspend initiated from userspace</div></div></div></foreignObject><image x="2" y="37" width="98" height="32" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-49"><g><path d="M 101 201 L 154.63 201" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 201 L 152.88 204.5 L 154.63 201 L 152.88 197.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-50"><g><rect x="1" y="151" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">GPU driver shuts down clocks and sends SMU messages</div></div></div></foreignObject><image x="2" y="172.5" width="98" height="61" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-51"><g><path d="M 211 151 L 261 201 L 211 251 L 161 201 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 201px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="194.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-53"><g><path d="M 51 851 L 51 884.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 889.88 L 47.5 882.88 L 51 884.63 L 54.5 882.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-54"><g><rect x="1" y="751" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 801px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">ACPI s2idle driver notifies EC using _DSM</div></div></div></foreignObject><image x="2" y="780" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-55"><g><path d="M 51 991 L 51 1024.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 1029.88 L 47.5 1022.88 L 51 1024.63 L 54.5 1022.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-56"><g><rect x="1" y="891" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 941px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">uPEP driver (amd-pmc) sends OS_HINT</div></div></div></foreignObject><image x="2" y="920" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-57"><g><path d="M 101 1081 L 154.63 1081" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 159.88 1081 L 152.88 1084.5 L 154.63 1081 L 152.88 1077.5 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-58"><g><rect x="1" y="1031" width="100" height="100" rx="7" ry="7" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1081px; margin-left: 2px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Put all x86 CPU cores into ACPI C3</div></div></div></foreignObject><image x="2" y="1060" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-59"><g><path d="M 191.61 1051 L 230.39 1051 C 247.29 1051 261 1064.43 261 1081 C 261 1097.57 247.29 1111 230.39 1111 L 191.61 1111 C 174.71 1111 161 1097.57 161 1081 C 161 1064.43 174.71 1051 191.61 1051 Z" fill="#0cf232" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 1081px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">s2idle loop waiting for IRQ <br />to wake</div></div></div></foreignObject><image x="162" y="1060" width="98" height="46" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-65"><g><path d="M 211 701 L 211 726 L 51 726 L 51 744.63" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 51 749.88 L 47.5 742.88 L 51 744.63 L 54.5 742.88 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g><g data-cell-id="8N6JJebqrzA787TgpwUj-66"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 737px; margin-left: 143px;"><div data-drawio-colors="color: #393C56; background-color: rgb(255, 255, 255); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; background-color: rgb(255, 255, 255); white-space: nowrap;">no</div></div></div></foreignObject><image x="136.5" y="731" width="13" height="15.75" xlink:href=""/></switch></g></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-60"><g><path d="M 211 601 L 261 651 L 211 701 L 161 651 Z" fill="#f2cc8f" stroke="#e07a5f" stroke-width="2" stroke-miterlimit="10" pointer-events="all"/></g><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 98px; height: 1px; padding-top: 651px; margin-left: 162px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;">Failures?</div></div></div></foreignObject><image x="162" y="644.5" width="98" height="17" xlink:href=""/></switch></g></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-61"><g><path d="M 261 651 L 356 651 L 356 87.37" fill="none" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="stroke"/><path d="M 356 82.12 L 359.5 89.12 L 356 87.37 L 352.5 89.12 Z" fill="#e07a5f" stroke="#e07a5f" stroke-miterlimit="10" pointer-events="all"/></g></g><g data-cell-id="8N6JJebqrzA787TgpwUj-62"><g><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 1px; height: 1px; padding-top: 662px; margin-left: 308px;"><div data-drawio-colors="color: #393C56; " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 11px; font-family: Helvetica; color: rgb(57, 60, 86); line-height: 1.2; pointer-events: all; font-weight: bold; white-space: nowrap;">yes</div></div></div></foreignObject><image x="299" y="656" width="18" height="15.75" xlink:href=""/></switch></g></g></g></g></g></g></svg> \ No newline at end of file
diff --git a/Documentation/arch/x86/sva.rst b/Documentation/arch/x86/sva.rst
index 33cb05005982..6a759984d471 100644
--- a/Documentation/arch/x86/sva.rst
+++ b/Documentation/arch/x86/sva.rst
@@ -25,7 +25,7 @@ to cache translations for virtual addresses. The IOMMU driver uses the
mmu_notifier() support to keep the device TLB cache and the CPU cache in
sync. When an ATS lookup fails for a virtual address, the device should
use the PRI in order to request the virtual address to be paged into the
-CPU page tables. The device must use ATS again in order the fetch the
+CPU page tables. The device must use ATS again in order to fetch the
translation before use.
Shared Hardware Workqueues
@@ -216,7 +216,7 @@ submitting work and processing completions.
Single Root I/O Virtualization (SR-IOV) focuses on providing independent
hardware interfaces for virtualizing hardware. Hence, it's required to be
-almost fully functional interface to software supporting the traditional
+an almost fully functional interface to software supporting the traditional
BARs, space for interrupts via MSI-X, its own register layout.
Virtual Functions (VFs) are assisted by the Physical Function (PF)
driver.
diff --git a/Documentation/arch/x86/tdx.rst b/Documentation/arch/x86/tdx.rst
index 719043cd8b46..61670e7df2f7 100644
--- a/Documentation/arch/x86/tdx.rst
+++ b/Documentation/arch/x86/tdx.rst
@@ -142,13 +142,6 @@ but depends on the BIOS to behave correctly.
Note TDX works with CPU logical online/offline, thus the kernel still
allows to offline logical CPU and online it again.
-Kexec()
-~~~~~~~
-
-TDX host support currently lacks the ability to handle kexec. For
-simplicity only one of them can be enabled in the Kconfig. This will be
-fixed in the future.
-
Erratum
~~~~~~~
@@ -171,6 +164,13 @@ If the platform has such erratum, the kernel prints additional message in
machine check handler to tell user the machine check may be caused by
kernel bug on TDX private memory.
+Kexec
+~~~~~~~
+
+Currently kexec doesn't work on the TDX platforms with the aforementioned
+erratum. It fails when loading the kexec kernel image. Otherwise it
+works normally.
+
Interaction vs S3 and deeper states
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst
index 08ebf9edbfc1..86bec8ac2c4d 100644
--- a/Documentation/arch/x86/topology.rst
+++ b/Documentation/arch/x86/topology.rst
@@ -47,17 +47,21 @@ AMD nomenclature for package is 'Node'.
Package-related topology information in the kernel:
- - cpuinfo_x86.x86_max_cores:
+ - topology_num_threads_per_package()
- The number of cores in a package. This information is retrieved via CPUID.
+ The number of threads in a package.
- - cpuinfo_x86.x86_max_dies:
+ - topology_num_cores_per_package()
- The number of dies in a package. This information is retrieved via CPUID.
+ The number of cores in a package.
+
+ - topology_max_dies_per_package()
+
+ The maximum number of dies in a package.
- cpuinfo_x86.topo.die_id:
- The physical ID of the die. This information is retrieved via CPUID.
+ The physical ID of the die.
- cpuinfo_x86.topo.pkg_id:
@@ -96,16 +100,6 @@ are SMT- or CMT-type threads.
AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
"core".
-Core-related topology information in the kernel:
-
- - smp_num_siblings:
-
- The number of threads in a core. The number of threads in a package can be
- calculated by::
-
- threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings
-
-
Threads
=======
A thread is a single scheduling unit. It's the equivalent to a logical Linux
@@ -141,6 +135,201 @@ Thread-related topology information in the kernel:
The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
"core_id."
+ - topology_logical_core_id();
+
+ The logical core ID to which a thread belongs.
+
+
+
+System topology enumeration
+===========================
+
+The topology on x86 systems can be discovered using a combination of vendor
+specific CPUID leaves which enumerate the processor topology and the cache
+hierarchy.
+
+The CPUID leaves in their preferred order of parsing for each x86 vendor is as
+follows:
+
+1) AMD
+
+ 1) CPUID leaf 0x80000026 [Extended CPU Topology] (Core::X86::Cpuid::ExCpuTopology)
+
+ The extended CPUID leaf 0x80000026 is the extension of the CPUID leaf 0xB
+ and provides the topology information of Core, Complex, CCD (Die), and
+ Socket in each level.
+
+ Support for the leaf is discovered by checking if the maximum extended
+ CPUID level is >= 0x80000026 and then checking if `LogProcAtThisLevel`
+ in `EBX[15:0]` at a particular level (starting from 0) is non-zero.
+
+ The `LevelType` in `ECX[15:8]` at the level provides the topology domain
+ the level describes - Core, Complex, CCD(Die), or the Socket.
+
+ The kernel uses the `CoreMaskWidth` from `EAX[4:0]` to discover the
+ number of bits that need to be right-shifted from `ExtendedLocalApicId`
+ in `EDX[31:0]` in order to get a unique Topology ID for the topology
+ level. CPUs with the same Topology ID share the resources at that level.
+
+ CPUID leaf 0x80000026 also provides more information regarding the power
+ and efficiency rankings, and about the core type on AMD processors with
+ heterogeneous characteristics.
+
+ If CPUID leaf 0x80000026 is supported, further parsing is not required.
+
+ 2) CPUID leaf 0x0000000B [Extended Topology Enumeration] (Core::X86::Cpuid::ExtTopEnum)
+
+ The extended CPUID leaf 0x0000000B is the predecessor on the extended
+ CPUID leaf 0x80000026 and only describes the core, and the socket domains
+ of the processor topology.
+
+ The support for the leaf is discovered by checking if the maximum supported
+ CPUID level is >= 0xB and then if `EBX[31:0]` at a particular level
+ (starting from 0) is non-zero.
+
+ The `LevelType` in `ECX[15:8]` at the level provides the topology domain
+ that the level describes - Thread, or Processor (Socket).
+
+ The kernel uses the `CoreMaskWidth` from `EAX[4:0]` to discover the
+ number of bits that need to be right-shifted from the `ExtendedLocalApicId`
+ in `EDX[31:0]` to get a unique Topology ID for that topology level. CPUs
+ sharing the Topology ID share the resources at that level.
+
+ If CPUID leaf 0xB is supported, further parsing is not required.
+
+
+ 3) CPUID leaf 0x80000008 ECX [Size Identifiers] (Core::X86::Cpuid::SizeId)
+
+ If neither the CPUID leaf 0x80000026 nor 0xB is supported, the number of
+ CPUs on the package is detected using the Size Identifier leaf
+ 0x80000008 ECX.
+
+ The support for the leaf is discovered by checking if the supported
+ extended CPUID level is >= 0x80000008.
+
+ The shifts from the APIC ID for the Socket ID is calculated from the
+ `ApicIdSize` field in `ECX[15:12]` if it is non-zero.
+
+ If `ApicIdSize` is reported to be zero, the shift is calculated as the
+ order of the `number of threads` calculated from `NC` field in
+ `ECX[7:0]` which describes the `number of threads - 1` on the package.
+
+ Unless Extended APIC ID is supported, the APIC ID used to find the
+ Socket ID is from the `LocalApicId` field of CPUID leaf 0x00000001
+ `EBX[31:24]`.
+
+ The topology parsing continues to detect if Extended APIC ID is
+ supported or not.
+
+
+ 4) CPUID leaf 0x8000001E [Extended APIC ID, Core Identifiers, Node Identifiers]
+ (Core::X86::Cpuid::{ExtApicId,CoreId,NodeId})
+
+ The support for Extended APIC ID can be detected by checking for the
+ presence of `TopologyExtensions` in `ECX[22]` of CPUID leaf 0x80000001
+ [Feature Identifiers] (Core::X86::Cpuid::FeatureExtIdEcx).
+
+ If Topology Extensions is supported, the APIC ID from `ExtendedApicId`
+ from CPUID leaf 0x8000001E `EAX[31:0]` should be preferred over that from
+ `LocalApicId` field of CPUID leaf 0x00000001 `EBX[31:24]` for topology
+ enumeration.
+
+ On processors of Family 0x17 and above that do not support CPUID leaf
+ 0x80000026 or CPUID leaf 0xB, the shifts from the APIC ID for the Core
+ ID is calculated using the order of `number of threads per core`
+ calculated using the `ThreadsPerCore` field in `EBX[15:8]` which
+ describes `number of threads per core - 1`.
+
+ On Processors of Family 0x15, the Core ID from `EBX[7:0]` is used as the
+ `cu_id` (Compute Unit ID) to detect CPUs that share the compute units.
+
+
+ All AMD processors that support the `TopologyExtensions` feature store the
+ `NodeId` from the `ECX[7:0]` of CPUID leaf 0x8000001E
+ (Core::X86::Cpuid::NodeId) as the per-CPU `node_id`. On older processors,
+ the `node_id` was discovered using MSR_FAM10H_NODE_ID MSR (MSR
+ 0x0xc001_100c). The presence of the NODE_ID MSR was detected by checking
+ `ECX[19]` of CPUID leaf 0x80000001 [Feature Identifiers]
+ (Core::X86::Cpuid::FeatureExtIdEcx).
+
+
+2) Intel
+
+ On Intel platforms, the CPUID leaves that enumerate the processor
+ topology are as follows:
+
+ 1) CPUID leaf 0x1F (V2 Extended Topology Enumeration Leaf)
+
+ The CPUID leaf 0x1F is the extension of the CPUID leaf 0xB and provides
+ the topology information of Core, Module, Tile, Die, DieGrp, and Socket
+ in each level.
+
+ The support for the leaf is discovered by checking if the supported
+ CPUID level is >= 0x1F and then `EBX[31:0]` at a particular level
+ (starting from 0) is non-zero.
+
+ The `Domain Type` in `ECX[15:8]` of the sub-leaf provides the topology
+ domain that the level describes - Core, Module, Tile, Die, DieGrp, and
+ Socket.
+
+ The kernel uses the value from `EAX[4:0]` to discover the number of
+ bits that need to be right shifted from the `x2APIC ID` in `EDX[31:0]`
+ to get a unique Topology ID for the topology level. CPUs with the same
+ Topology ID share the resources at that level.
+
+ If CPUID leaf 0x1F is supported, further parsing is not required.
+
+
+ 2) CPUID leaf 0x0000000B (Extended Topology Enumeration Leaf)
+
+ The extended CPUID leaf 0x0000000B is the predecessor of the V2 Extended
+ Topology Enumeration Leaf 0x1F and only describes the core, and the
+ socket domains of the processor topology.
+
+ The support for the leaf is iscovered by checking if the supported CPUID
+ level is >= 0xB and then checking if `EBX[31:0]` at a particular level
+ (starting from 0) is non-zero.
+
+ CPUID leaf 0x0000000B shares the same layout as CPUID leaf 0x1F and
+ should be enumerated in a similar manner.
+
+ If CPUID leaf 0xB is supported, further parsing is not required.
+
+
+ 3) CPUID leaf 0x00000004 (Deterministic Cache Parameters Leaf)
+
+ On Intel processors that support neither CPUID leaf 0x1F, nor CPUID leaf
+ 0xB, the shifts for the SMT domains is calculated using the number of
+ CPUs sharing the L1 cache.
+
+ Processors that feature Hyper-Threading is detected using `EDX[28]` of
+ CPUID leaf 0x1 (Basic CPUID Information).
+
+ The order of `Maximum number of addressable IDs for logical processors
+ sharing this cache` from `EAX[25:14]` of level-0 of CPUID 0x4 provides
+ the shifts from the APIC ID required to compute the Core ID.
+
+ The APIC ID and Package information is computed using the data from
+ CPUID leaf 0x1.
+
+
+ 4) CPUID leaf 0x00000001 (Basic CPUID Information)
+
+ The mask and shifts to derive the Physical Package (socket) ID is
+ computed using the `Maximum number of addressable IDs for logical
+ processors in this physical package` from `EBX[23:16]` of CPUID leaf
+ 0x1.
+
+ The APIC ID on the legacy platforms is derived from the `Initial APIC
+ ID` field from `EBX[31:24]` of CPUID leaf 0x1.
+
+
+3) Centaur and Zhaoxin
+
+ Similar to Intel, Centaur and Zhaoxin use a combination of CPUID leaf
+ 0x00000004 (Deterministic Cache Parameters Leaf) and CPUID leaf 0x00000001
+ (Basic CPUID Information) to derive the topology information.
+
System topology examples
diff --git a/Documentation/arch/x86/usb-legacy-support.rst b/Documentation/arch/x86/usb-legacy-support.rst
index e01c08b7c981..b17bf122270a 100644
--- a/Documentation/arch/x86/usb-legacy-support.rst
+++ b/Documentation/arch/x86/usb-legacy-support.rst
@@ -20,11 +20,7 @@ It has several drawbacks, though:
features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may
not be available.
-2) If CONFIG_HIGHMEM64G is enabled, the PS/2 mouse emulation can cause
- system crashes, because the SMM BIOS is not expecting to be in PAE mode.
- The Intel E7505 is a typical machine where this happens.
-
-3) If AMD64 64-bit mode is enabled, again system crashes often happen,
+2) If AMD64 64-bit mode is enabled, again system crashes often happen,
because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The
BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit
yet.
@@ -38,11 +34,6 @@ Problem 1)
compiled-in, too.
Problem 2)
- can currently only be solved by either disabling HIGHMEM64G
- in the kernel config or USB Legacy support in the BIOS. A BIOS update
- could help, but so far no such update exists.
-
-Problem 3)
is usually fixed by a BIOS update. Check the board
manufacturers web site. If an update is not available, disable USB
Legacy support in the BIOS. If this alone doesn't help, try also adding
diff --git a/Documentation/arch/x86/x86_64/5level-paging.rst b/Documentation/arch/x86/x86_64/5level-paging.rst
index 71f882f4a173..ad7ddc13f79d 100644
--- a/Documentation/arch/x86/x86_64/5level-paging.rst
+++ b/Documentation/arch/x86/x86_64/5level-paging.rst
@@ -22,15 +22,6 @@ QEMU 2.9 and later support 5-level paging.
Virtual memory layout for 5-level paging is described in
Documentation/arch/x86/x86_64/mm.rst
-
-Enabling 5-level paging
-=======================
-CONFIG_X86_5LEVEL=y enables the feature.
-
-Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
-In this case additional page table level -- p4d -- will be folded at
-runtime.
-
User-space and large virtual address space
==========================================
On x86, 5-level paging enables 56-bit userspace virtual address space.
diff --git a/Documentation/arch/x86/x86_64/boot-options.rst b/Documentation/arch/x86/x86_64/boot-options.rst
deleted file mode 100644
index 137432d34109..000000000000
--- a/Documentation/arch/x86/x86_64/boot-options.rst
+++ /dev/null
@@ -1,319 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-===========================
-AMD64 Specific Boot Options
-===========================
-
-There are many others (usually documented in driver documentation), but
-only the AMD64 specific ones are listed here.
-
-Machine check
-=============
-Please see Documentation/arch/x86/x86_64/machinecheck.rst for sysfs runtime tunables.
-
- mce=off
- Disable machine check
- mce=no_cmci
- Disable CMCI(Corrected Machine Check Interrupt) that
- Intel processor supports. Usually this disablement is
- not recommended, but it might be handy if your hardware
- is misbehaving.
- Note that you'll get more problems without CMCI than with
- due to the shared banks, i.e. you might get duplicated
- error logs.
- mce=dont_log_ce
- Don't make logs for corrected errors. All events reported
- as corrected are silently cleared by OS.
- This option will be useful if you have no interest in any
- of corrected errors.
- mce=ignore_ce
- Disable features for corrected errors, e.g. polling timer
- and CMCI. All events reported as corrected are not cleared
- by OS and remained in its error banks.
- Usually this disablement is not recommended, however if
- there is an agent checking/clearing corrected errors
- (e.g. BIOS or hardware monitoring applications), conflicting
- with OS's error handling, and you cannot deactivate the agent,
- then this option will be a help.
- mce=no_lmce
- Do not opt-in to Local MCE delivery. Use legacy method
- to broadcast MCEs.
- mce=bootlog
- Enable logging of machine checks left over from booting.
- Disabled by default on AMD Fam10h and older because some BIOS
- leave bogus ones.
- If your BIOS doesn't do that it's a good idea to enable though
- to make sure you log even machine check events that result
- in a reboot. On Intel systems it is enabled by default.
- mce=nobootlog
- Disable boot machine check logging.
- mce=monarchtimeout (number)
- monarchtimeout:
- Sets the time in us to wait for other CPUs on machine checks. 0
- to disable.
- mce=bios_cmci_threshold
- Don't overwrite the bios-set CMCI threshold. This boot option
- prevents Linux from overwriting the CMCI threshold set by the
- bios. Without this option, Linux always sets the CMCI
- threshold to 1. Enabling this may make memory predictive failure
- analysis less effective if the bios sets thresholds for memory
- errors since we will not see details for all errors.
- mce=recovery
- Force-enable recoverable machine check code paths
-
- nomce (for compatibility with i386)
- same as mce=off
-
- Everything else is in sysfs now.
-
-APICs
-=====
-
- apic
- Use IO-APIC. Default
-
- noapic
- Don't use the IO-APIC.
-
- disableapic
- Don't use the local APIC
-
- nolapic
- Don't use the local APIC (alias for i386 compatibility)
-
- pirq=...
- See Documentation/arch/x86/i386/IO-APIC.rst
-
- noapictimer
- Don't set up the APIC timer
-
- no_timer_check
- Don't check the IO-APIC timer. This can work around
- problems with incorrect timer initialization on some boards.
-
- apicpmtimer
- Do APIC timer calibration using the pmtimer. Implies
- apicmaintimer. Useful when your PIT timer is totally broken.
-
-Timing
-======
-
- notsc
- Deprecated, use tsc=unstable instead.
-
- nohpet
- Don't use the HPET timer.
-
-Idle loop
-=========
-
- idle=poll
- Don't do power saving in the idle loop using HLT, but poll for rescheduling
- event. This will make the CPUs eat a lot more power, but may be useful
- to get slightly better performance in multiprocessor benchmarks. It also
- makes some profiling using performance counters more accurate.
- Please note that on systems with MONITOR/MWAIT support (like Intel EM64T
- CPUs) this option has no performance advantage over the normal idle loop.
- It may also interact badly with hyperthreading.
-
-Rebooting
-=========
-
- reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] | p[ci] [, [w]arm | [c]old]
- bios
- Use the CPU reboot vector for warm reset
- warm
- Don't set the cold reboot flag
- cold
- Set the cold reboot flag
- triple
- Force a triple fault (init)
- kbd
- Use the keyboard controller. cold reset (default)
- acpi
- Use the ACPI RESET_REG in the FADT. If ACPI is not configured or
- the ACPI reset does not work, the reboot path attempts the reset
- using the keyboard controller.
- efi
- Use efi reset_system runtime service. If EFI is not configured or
- the EFI reset does not work, the reboot path attempts the reset using
- the keyboard controller.
- pci
- Use a write to the PCI config space register 0xcf9 to trigger reboot.
-
- Using warm reset will be much faster especially on big memory
- systems because the BIOS will not go through the memory check.
- Disadvantage is that not all hardware will be completely reinitialized
- on reboot so there may be boot problems on some systems.
-
- reboot=force
- Don't stop other CPUs on reboot. This can make reboot more reliable
- in some cases.
-
- reboot=default
- There are some built-in platform specific "quirks" - you may see:
- "reboot: <name> series board detected. Selecting <type> for reboots."
- In the case where you think the quirk is in error (e.g. you have
- newer BIOS, or newer board) using this option will ignore the built-in
- quirk table, and use the generic default reboot actions.
-
-NUMA
-====
-
- numa=off
- Only set up a single NUMA node spanning all memory.
-
- numa=noacpi
- Don't parse the SRAT table for NUMA setup
-
- numa=nohmat
- Don't parse the HMAT table for NUMA setup, or soft-reserved memory
- partitioning.
-
- numa=fake=<size>[MG]
- If given as a memory unit, fills all system RAM with nodes of
- size interleaved over physical nodes.
-
- numa=fake=<N>
- If given as an integer, fills all system RAM with N fake nodes
- interleaved over physical nodes.
-
- numa=fake=<N>U
- If given as an integer followed by 'U', it will divide each
- physical node into N emulated nodes.
-
-ACPI
-====
-
- acpi=off
- Don't enable ACPI
- acpi=ht
- Use ACPI boot table parsing, but don't enable ACPI interpreter
- acpi=force
- Force ACPI on (currently not needed)
- acpi=strict
- Disable out of spec ACPI workarounds.
- acpi_sci={edge,level,high,low}
- Set up ACPI SCI interrupt.
- acpi=noirq
- Don't route interrupts
- acpi=nocmcff
- Disable firmware first mode for corrected errors. This
- disables parsing the HEST CMC error source to check if
- firmware has set the FF flag. This may result in
- duplicate corrected error reports.
-
-PCI
-===
-
- pci=off
- Don't use PCI
- pci=conf1
- Use conf1 access.
- pci=conf2
- Use conf2 access.
- pci=rom
- Assign ROMs.
- pci=assign-busses
- Assign busses
- pci=irqmask=MASK
- Set PCI interrupt mask to MASK
- pci=lastbus=NUMBER
- Scan up to NUMBER busses, no matter what the mptable says.
- pci=noacpi
- Don't use ACPI to set up PCI interrupt routing.
-
-IOMMU (input/output memory management unit)
-===========================================
-Multiple x86-64 PCI-DMA mapping implementations exist, for example:
-
- 1. <kernel/dma/direct.c>: use no hardware/software IOMMU at all
- (e.g. because you have < 3 GB memory).
- Kernel boot message: "PCI-DMA: Disabling IOMMU"
-
- 2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
- Kernel boot message: "PCI-DMA: using GART IOMMU"
-
- 3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
- e.g. if there is no hardware IOMMU in the system and it is need because
- you have >3GB memory or told the kernel to us it (iommu=soft))
- Kernel boot message: "PCI-DMA: Using software bounce buffering
- for IO (SWIOTLB)"
-
-::
-
- iommu=[<size>][,noagp][,off][,force][,noforce]
- [,memaper[=<order>]][,merge][,fullflush][,nomerge]
- [,noaperture]
-
-General iommu options:
-
- off
- Don't initialize and use any kind of IOMMU.
- noforce
- Don't force hardware IOMMU usage when it is not needed. (default).
- force
- Force the use of the hardware IOMMU even when it is
- not actually needed (e.g. because < 3 GB memory).
- soft
- Use software bounce buffering (SWIOTLB) (default for
- Intel machines). This can be used to prevent the usage
- of an available hardware IOMMU.
-
-iommu options only relevant to the AMD GART hardware IOMMU:
-
- <size>
- Set the size of the remapping area in bytes.
- allowed
- Overwrite iommu off workarounds for specific chipsets.
- fullflush
- Flush IOMMU on each allocation (default).
- nofullflush
- Don't use IOMMU fullflush.
- memaper[=<order>]
- Allocate an own aperture over RAM with size 32MB<<order.
- (default: order=1, i.e. 64MB)
- merge
- Do scatter-gather (SG) merging. Implies "force" (experimental).
- nomerge
- Don't do scatter-gather (SG) merging.
- noaperture
- Ask the IOMMU not to touch the aperture for AGP.
- noagp
- Don't initialize the AGP driver and use full aperture.
- panic
- Always panic when IOMMU overflows.
-
-iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
-implementation:
-
- swiotlb=<slots>[,force,noforce]
- <slots>
- Prereserve that many 2K slots for the software IO bounce buffering.
- force
- Force all IO through the software TLB.
- noforce
- Do not initialize the software TLB.
-
-
-Miscellaneous
-=============
-
- nogbpages
- Do not use GB pages for kernel direct mappings.
- gbpages
- Use GB pages for kernel direct mappings.
-
-
-AMD SEV (Secure Encrypted Virtualization)
-=========================================
-Options relating to AMD SEV, specified via the following format:
-
-::
-
- sev=option1[,option2]
-
-The available options are:
-
- debug
- Enable debug messages.
diff --git a/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst
index ba74617d4999..970ee94eb551 100644
--- a/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst
+++ b/Documentation/arch/x86/x86_64/fake-numa-for-cpusets.rst
@@ -18,7 +18,7 @@ For more information on the features of cpusets, see
Documentation/admin-guide/cgroup-v1/cpusets.rst.
There are a number of different configurations you can use for your needs. For
more information on the numa=fake command line option and its various ways of
-configuring fake nodes, see Documentation/arch/x86/x86_64/boot-options.rst.
+configuring fake nodes, see Documentation/admin-guide/kernel-parameters.txt
For the purposes of this introduction, we'll assume a very primitive NUMA
emulation setup of "numa=fake=4*512,". This will split our system memory into
diff --git a/Documentation/arch/x86/x86_64/fred.rst b/Documentation/arch/x86/x86_64/fred.rst
new file mode 100644
index 000000000000..9f57e7b91f7e
--- /dev/null
+++ b/Documentation/arch/x86/x86_64/fred.rst
@@ -0,0 +1,96 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Flexible Return and Event Delivery (FRED)
+=========================================
+
+Overview
+========
+
+The FRED architecture defines simple new transitions that change
+privilege level (ring transitions). The FRED architecture was
+designed with the following goals:
+
+1) Improve overall performance and response time by replacing event
+ delivery through the interrupt descriptor table (IDT event
+ delivery) and event return by the IRET instruction with lower
+ latency transitions.
+
+2) Improve software robustness by ensuring that event delivery
+ establishes the full supervisor context and that event return
+ establishes the full user context.
+
+The new transitions defined by the FRED architecture are FRED event
+delivery and, for returning from events, two FRED return instructions.
+FRED event delivery can effect a transition from ring 3 to ring 0, but
+it is used also to deliver events incident to ring 0. One FRED
+instruction (ERETU) effects a return from ring 0 to ring 3, while the
+other (ERETS) returns while remaining in ring 0. Collectively, FRED
+event delivery and the FRED return instructions are FRED transitions.
+
+In addition to these transitions, the FRED architecture defines a new
+instruction (LKGS) for managing the state of the GS segment register.
+The LKGS instruction can be used by 64-bit operating systems that do
+not use the new FRED transitions.
+
+Furthermore, the FRED architecture is easy to extend for future CPU
+architectures.
+
+Software based event dispatching
+================================
+
+FRED operates differently from IDT in terms of event handling. Instead
+of directly dispatching an event to its handler based on the event
+vector, FRED requires the software to dispatch an event to its handler
+based on both the event's type and vector. Therefore, an event dispatch
+framework must be implemented to facilitate the event-to-handler
+dispatch process. The FRED event dispatch framework takes control
+once an event is delivered, and employs a two-level dispatch.
+
+The first level dispatching is event type based, and the second level
+dispatching is event vector based.
+
+Full supervisor/user context
+============================
+
+FRED event delivery atomically save and restore full supervisor/user
+context upon event delivery and return. Thus it avoids the problem of
+transient states due to %cr2 and/or %dr6, and it is no longer needed
+to handle all the ugly corner cases caused by half baked entry states.
+
+FRED allows explicit unblock of NMI with new event return instructions
+ERETS/ERETU, avoiding the mess caused by IRET which unconditionally
+unblocks NMI, e.g., when an exception happens during NMI handling.
+
+FRED always restores the full value of %rsp, thus ESPFIX is no longer
+needed when FRED is enabled.
+
+LKGS
+====
+
+LKGS behaves like the MOV to GS instruction except that it loads the
+base address into the IA32_KERNEL_GS_BASE MSR instead of the GS
+segment’s descriptor cache. With LKGS, it ends up with avoiding
+mucking with kernel GS, i.e., an operating system can always operate
+with its own GS base address.
+
+Because FRED event delivery from ring 3 and ERETU both swap the value
+of the GS base address and that of the IA32_KERNEL_GS_BASE MSR, plus
+the introduction of LKGS instruction, the SWAPGS instruction is no
+longer needed when FRED is enabled, thus is disallowed (#UD).
+
+Stack levels
+============
+
+4 stack levels 0~3 are introduced to replace the nonreentrant IST for
+event handling, and each stack level should be configured to use a
+dedicated stack.
+
+The current stack level could be unchanged or go higher upon FRED
+event delivery. If unchanged, the CPU keeps using the current event
+stack. If higher, the CPU switches to a new event stack specified by
+the MSR of the new stack level, i.e., MSR_IA32_FRED_RSP[123].
+
+Only execution of a FRED return instruction ERET[US], could lower the
+current stack level, causing the CPU to switch back to the stack it was
+on before a previous event delivery that promoted the stack level.
diff --git a/Documentation/arch/x86/x86_64/fsgs.rst b/Documentation/arch/x86/x86_64/fsgs.rst
index 50960e09e1f6..6bda4d16d3f7 100644
--- a/Documentation/arch/x86/x86_64/fsgs.rst
+++ b/Documentation/arch/x86/x86_64/fsgs.rst
@@ -125,17 +125,17 @@ FSGSBASE instructions enablement
FSGSBASE instructions compiler support
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-GCC version 4.6.4 and newer provide instrinsics for the FSGSBASE
+GCC version 4.6.4 and newer provide intrinsics for the FSGSBASE
instructions. Clang 5 supports them as well.
=================== ===========================
_readfsbase_u64() Read the FS base register
- _readfsbase_u64() Read the GS base register
+ _readgsbase_u64() Read the GS base register
_writefsbase_u64() Write the FS base register
_writegsbase_u64() Write the GS base register
=================== ===========================
-To utilize these instrinsics <immintrin.h> must be included in the source
+To utilize these intrinsics <immintrin.h> must be included in the source
code and the compiler option -mfsgsbase has to be added.
Compiler support for FS/GS based addressing
diff --git a/Documentation/arch/x86/x86_64/index.rst b/Documentation/arch/x86/x86_64/index.rst
index a56070fc8e77..a0261957a08a 100644
--- a/Documentation/arch/x86/x86_64/index.rst
+++ b/Documentation/arch/x86/x86_64/index.rst
@@ -7,7 +7,6 @@ x86_64 Support
.. toctree::
:maxdepth: 2
- boot-options
uefi
mm
5level-paging
@@ -15,3 +14,4 @@ x86_64 Support
cpu-hotplug-spec
machinecheck
fsgs
+ fred
diff --git a/Documentation/arch/x86/x86_64/mm.rst b/Documentation/arch/x86/x86_64/mm.rst
index 35e5e18c83d0..a6cf05d51bd8 100644
--- a/Documentation/arch/x86/x86_64/mm.rst
+++ b/Documentation/arch/x86/x86_64/mm.rst
@@ -29,15 +29,27 @@ Complete virtual memory map with 4-level page tables
Start addr | Offset | End addr | Size | VM area description
========================================================================================================================
| | | |
- 0000000000000000 | 0 | 00007fffffffffff | 128 TB | user-space virtual memory, different per mm
+ 0000000000000000 | 0 | 00007fffffffefff | ~128 TB | user-space virtual memory, different per mm
+ 00007ffffffff000 | ~128 TB | 00007fffffffffff | 4 kB | ... guard hole
__________________|____________|__________________|_________|___________________________________________________________
| | | |
- 0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
- | | | | virtual memory addresses up to the -128 TB
+ 0000800000000000 | +128 TB | 7fffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -8 EB
| | | | starting offset of kernel mappings.
+ | | | |
+ | | | | LAM relaxes canonicallity check allowing to create aliases
+ | | | | for userspace memory here.
__________________|____________|__________________|_________|___________________________________________________________
|
| Kernel-space virtual memory, shared between all processes:
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 8000000000000000 | -8 EB | ffff7fffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -128 TB
+ | | | | starting offset of kernel mappings.
+ | | | |
+ | | | | LAM_SUP relaxes canonicallity check allowing to create
+ | | | | aliases for kernel memory here.
____________________________________________________________|___________________________________________________________
| | | |
ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor
@@ -88,16 +100,27 @@ Complete virtual memory map with 5-level page tables
Start addr | Offset | End addr | Size | VM area description
========================================================================================================================
| | | |
- 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm
+ 0000000000000000 | 0 | 00fffffffffff000 | ~64 PB | user-space virtual memory, different per mm
+ 00fffffffffff000 | ~64 PB | 00ffffffffffffff | 4 kB | ... guard hole
__________________|____________|__________________|_________|___________________________________________________________
| | | |
- 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
- | | | | virtual memory addresses up to the -64 PB
+ 0100000000000000 | +64 PB | 7fffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -8EB TB
| | | | starting offset of kernel mappings.
+ | | | |
+ | | | | LAM relaxes canonicallity check allowing to create aliases
+ | | | | for userspace memory here.
__________________|____________|__________________|_________|___________________________________________________________
|
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
+ 8000000000000000 | -8 EB | feffffffffffffff | ~8 EB | ... huge, almost 63 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -64 PB
+ | | | | starting offset of kernel mappings.
+ | | | |
+ | | | | LAM_SUP relaxes canonicallity check allowing to create
+ | | | | aliases for kernel memory here.
+ ____________________________________________________________|___________________________________________________________
| | | |
ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor
ff10000000000000 | -60 PB | ff10ffffffffffff | 0.25 PB | LDT remap for PTI
@@ -153,5 +176,5 @@ Be very careful vs. KASLR when changing anything here. The KASLR address
range must not overlap with anything except the KASAN shadow area, which is
correct as KASAN disables KASLR.
-For both 4- and 5-level layouts, the STACKLEAK_POISON value in the last 2MB
+For both 4- and 5-level layouts, the KSTACK_ERASE_POISON value in the last 2MB
hole: ffffffffffff4111
diff --git a/Documentation/arch/x86/x86_64/uefi.rst b/Documentation/arch/x86/x86_64/uefi.rst
index fbc30c9a071d..e84592dbd6c1 100644
--- a/Documentation/arch/x86/x86_64/uefi.rst
+++ b/Documentation/arch/x86/x86_64/uefi.rst
@@ -12,14 +12,20 @@ with EFI firmware and specifications are listed below.
1. UEFI specification: http://www.uefi.org
-2. Booting Linux kernel on UEFI x86_64 platform requires bootloader
- support. Elilo with x86_64 support can be used.
+2. Booting Linux kernel on UEFI x86_64 platform can either be
+ done using the <Documentation/admin-guide/efi-stub.rst> or using a
+ separate bootloader.
3. x86_64 platform with EFI/UEFI firmware.
Mechanics
---------
+Refer to <Documentation/admin-guide/efi-stub.rst> to learn how to use the EFI stub.
+
+Below are general EFI setup guidelines on the x86_64 platform,
+regardless of whether you use the EFI stub or a separate bootloader.
+
- Build the kernel with the following configuration::
CONFIG_FB_EFI=y
@@ -31,16 +37,27 @@ Mechanics
CONFIG_EFI=y
CONFIG_EFIVAR_FS=y or m # optional
-- Create a VFAT partition on the disk
-- Copy the following to the VFAT partition:
+- Create a VFAT partition on the disk with the EFI System flag
+ You can do this with fdisk with the following commands:
+
+ 1. g - initialize a GPT partition table
+ 2. n - create a new partition
+ 3. t - change the partition type to "EFI System" (number 1)
+ 4. w - write and save the changes
+
+ Afterwards, initialize the VFAT filesystem by running mkfs::
+
+ mkfs.fat /dev/<your-partition>
+
+- Copy the boot files to the VFAT partition:
+ If you use the EFI stub method, the kernel acts also as an EFI executable.
+
+ You can just copy the bzImage to the EFI/boot/bootx64.efi path on the partition
+ so that it will automatically get booted, see the <Documentation/admin-guide/efi-stub.rst> page
+ for additional instructions regarding passage of kernel parameters and initramfs.
- elilo bootloader with x86_64 support, elilo configuration file,
- kernel image built in first step and corresponding
- initrd. Instructions on building elilo and its dependencies
- can be found in the elilo sourceforge project.
+ If you use a custom bootloader, refer to the relevant documentation for help on this part.
-- Boot to EFI shell and invoke elilo choosing the kernel image built
- in first step.
- If some or all EFI runtime services don't work, you can try following
kernel command line parameters to turn off some or all EFI runtime
services.
diff --git a/Documentation/arch/x86/xstate.rst b/Documentation/arch/x86/xstate.rst
index ae5c69e48b11..cec05ac464c1 100644
--- a/Documentation/arch/x86/xstate.rst
+++ b/Documentation/arch/x86/xstate.rst
@@ -138,7 +138,7 @@ Note this example does not include the sigaltstack preparation.
Dynamic features in signal frames
---------------------------------
-Dynamcally enabled features are not written to the signal frame upon signal
+Dynamically enabled features are not written to the signal frame upon signal
entry if the feature is in its initial configuration. This differs from
non-dynamic features which are always written regardless of their
configuration. Signal handlers can examine the XSAVE buffer's XSTATE_BV
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index d7adc6d543db..bee3b1bca9a7 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -171,14 +171,14 @@ The rule of thumb:
- RMW operations that are conditional are unordered on FAILURE,
otherwise the above rules apply.
-Except of course when an operation has an explicit ordering like:
+Except of course when a successful operation has an explicit ordering like:
{}_relaxed: unordered
{}_acquire: the R of the RMW (or atomic_read) is an ACQUIRE
{}_release: the W of the RMW (or atomic_set) is a RELEASE
Where 'unordered' is against other memory locations. Address dependencies are
-not defeated.
+not defeated. Conditional operations are still unordered on FAILURE.
Fully ordered primitives are ordered against everything prior and everything
subsequent. Therefore a fully ordered primitive is like having an smp_mb()
diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst
index df3a8a47f58c..a0ff0eb11e7f 100644
--- a/Documentation/block/bfq-iosched.rst
+++ b/Documentation/block/bfq-iosched.rst
@@ -9,7 +9,7 @@ controllers), BFQ's main features are:
- BFQ guarantees a high system and application responsiveness, and a
low latency for time-sensitive applications, such as audio or video
players;
-- BFQ distributes bandwidth, and not just time, among processes or
+- BFQ distributes bandwidth, not just time, among processes or
groups (switching back to time distribution when needed to keep
throughput high).
@@ -111,7 +111,7 @@ Higher speed for code-development tasks
If some additional workload happens to be executed in parallel, then
BFQ executes the I/O-related components of typical code-development
-tasks (compilation, checkout, merge, ...) much more quickly than CFQ,
+tasks (compilation, checkout, merge, etc.) much more quickly than CFQ,
NOOP or DEADLINE.
High throughput
@@ -127,9 +127,9 @@ Strong fairness, bandwidth and delay guarantees
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
BFQ distributes the device throughput, and not just the device time,
-among I/O-bound applications in proportion their weights, with any
+among I/O-bound applications in proportion to their weights, with any
workload and regardless of the device parameters. From these bandwidth
-guarantees, it is possible to compute tight per-I/O-request delay
+guarantees, it is possible to compute a tight per-I/O-request delay
guarantees by a simple formula. If not configured for strict service
guarantees, BFQ switches to time-based resource sharing (only) for
applications that would otherwise cause a throughput loss.
@@ -199,7 +199,7 @@ plus a lot of code, are borrowed from CFQ.
- On flash-based storage with internal queueing of commands
(typically NCQ), device idling happens to be always detrimental
- for throughput. So, with these devices, BFQ performs idling
+ to throughput. So, with these devices, BFQ performs idling
only when strictly needed for service guarantees, i.e., for
guaranteeing low latency or fairness. In these cases, overall
throughput may be sub-optimal. No solution currently exists to
@@ -212,7 +212,7 @@ plus a lot of code, are borrowed from CFQ.
and to reduce their latency. The most important action taken to
achieve this goal is to give to the queues associated with these
applications more than their fair share of the device
- throughput. For brevity, we call just "weight-raising" the whole
+ throughput. For brevity, we call it just "weight-raising" the whole
sets of actions taken by BFQ to privilege these queues. In
particular, BFQ provides a milder form of weight-raising for
interactive applications, and a stronger form for soft real-time
@@ -231,7 +231,7 @@ plus a lot of code, are borrowed from CFQ.
responsive in detecting interleaved I/O (cooperating processes),
that it enables BFQ to achieve a high throughput, by queue
merging, even for queues for which CFQ needs a different
- mechanism, preemption, to get a high throughput. As such EQM is a
+ mechanism, preemption, to get a high throughput. As such, EQM is a
unified mechanism to achieve a high throughput with interleaved
I/O.
@@ -254,7 +254,7 @@ plus a lot of code, are borrowed from CFQ.
- First, with any proportional-share scheduler, the maximum
deviation with respect to an ideal service is proportional to
the maximum budget (slice) assigned to queues. As a consequence,
- BFQ can keep this deviation tight not only because of the
+ BFQ can keep this deviation tight, not only because of the
accurate service of B-WF2Q+, but also because BFQ *does not*
need to assign a larger budget to a queue to let the queue
receive a higher fraction of the device throughput.
@@ -327,7 +327,7 @@ applications. Unset this tunable if you need/want to control weights.
slice_idle
----------
-This parameter specifies how long BFQ should idle for next I/O
+This parameter specifies how long BFQ should idle for the next I/O
request, when certain sync BFQ queues become empty. By default
slice_idle is a non-zero value. Idling has a double purpose: boosting
throughput and making sure that the desired throughput distribution is
@@ -365,7 +365,7 @@ terms of I/O-request dispatches. To guarantee that the actual service
order then corresponds to the dispatch order, the strict_guarantees
tunable must be set too.
-There is an important flipside for idling: apart from the above cases
+There is an important flip side to idling: apart from the above cases
where it is beneficial also for throughput, idling can severely impact
throughput. One important case is random workload. Because of this
issue, BFQ tends to avoid idling as much as possible, when it is not
@@ -475,7 +475,7 @@ max_budget
Maximum amount of service, measured in sectors, that can be provided
to a BFQ queue once it is set in service (of course within the limits
-of the above timeout). According to what said in the description of
+of the above timeout). According to what was said in the description of
the algorithm, larger values increase the throughput in proportion to
the percentage of sequential I/O requests issued. The price of larger
values is that they coarsen the granularity of short-term bandwidth
diff --git a/Documentation/block/cmdline-partition.rst b/Documentation/block/cmdline-partition.rst
index 530bedff548a..526ba201dddc 100644
--- a/Documentation/block/cmdline-partition.rst
+++ b/Documentation/block/cmdline-partition.rst
@@ -39,13 +39,16 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
create a link to block device partition with the name "PARTNAME".
User space application can access partition by partition name.
+ro
+ read-only. Flag the partition as read-only.
+
Example:
eMMC disk names are "mmcblk0" and "mmcblk0boot0".
bootargs::
- 'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
+ 'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot)ro,-(kernel)'
dmesg::
diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst
index 6a760c0eb192..99905e880a0e 100644
--- a/Documentation/block/data-integrity.rst
+++ b/Documentation/block/data-integrity.rst
@@ -153,18 +153,11 @@ bio_free() will automatically free the bip.
4.2 Block Device
----------------
-Because the format of the protection data is tied to the physical
-disk, each block device has been extended with a block integrity
-profile (struct blk_integrity). This optional profile is registered
-with the block layer using blk_integrity_register().
-
-The profile contains callback functions for generating and verifying
-the protection data, as well as getting and setting application tags.
-The profile also contains a few constants to aid in completing,
-merging and splitting the integrity metadata.
+Block devices can set up the integrity information in the integrity
+sub-struture of the queue_limits structure.
Layered block devices will need to pick a profile that's appropriate
-for all subdevices. blk_integrity_compare() can help with that. DM
+for all subdevices. queue_limits_stack_integrity() can help with that. DM
and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
will require extra work due to the application tag.
@@ -250,42 +243,6 @@ will require extra work due to the application tag.
integrity upon completion.
-5.4 Registering A Block Device As Capable Of Exchanging Integrity Metadata
---------------------------------------------------------------------------
-
- To enable integrity exchange on a block device the gendisk must be
- registered as capable:
-
- `int blk_integrity_register(gendisk, blk_integrity);`
-
- The blk_integrity struct is a template and should contain the
- following::
-
- static struct blk_integrity my_profile = {
- .name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
- .generate_fn = my_generate_fn,
- .verify_fn = my_verify_fn,
- .tuple_size = sizeof(struct my_tuple_size),
- .tag_size = <tag bytes per hw sector>,
- };
-
- 'name' is a text string which will be visible in sysfs. This is
- part of the userland API so chose it carefully and never change
- it. The format is standards body-type-variant.
- E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
-
- 'generate_fn' generates appropriate integrity metadata (for WRITE).
-
- 'verify_fn' verifies that the data buffer matches the integrity
- metadata.
-
- 'tuple_size' must be set to match the size of the integrity
- metadata per sector. I.e. 8 for DIF and EPP.
-
- 'tag_size' must be set to identify how many bytes of tag space
- are available per hardware sector. For DIF this is either 2 or
- 0 depending on the value of the Control Mode Page ATO bit.
-
----------------------------------------------------------------------
2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst
index 90b733422ed4..6380e6ab492b 100644
--- a/Documentation/block/inline-encryption.rst
+++ b/Documentation/block/inline-encryption.rst
@@ -77,10 +77,10 @@ Basic design
============
We introduce ``struct blk_crypto_key`` to represent an inline encryption key and
-how it will be used. This includes the actual bytes of the key; the size of the
-key; the algorithm and data unit size the key will be used with; and the number
-of bytes needed to represent the maximum data unit number the key will be used
-with.
+how it will be used. This includes the type of the key (raw or
+hardware-wrapped); the actual bytes of the key; the size of the key; the
+algorithm and data unit size the key will be used with; and the number of bytes
+needed to represent the maximum data unit number the key will be used with.
We introduce ``struct bio_crypt_ctx`` to represent an encryption context. It
contains a data unit number and a pointer to a blk_crypto_key. We add pointers
@@ -301,3 +301,250 @@ kernel will pretend that the device does not support hardware inline encryption
When the crypto API fallback is enabled, this means that all bios with and
encryption context will use the fallback, and IO will complete as usual. When
the fallback is disabled, a bio with an encryption context will be failed.
+
+.. _hardware_wrapped_keys:
+
+Hardware-wrapped keys
+=====================
+
+Motivation and threat model
+---------------------------
+
+Linux storage encryption (dm-crypt, fscrypt, eCryptfs, etc.) traditionally
+relies on the raw encryption key(s) being present in kernel memory so that the
+encryption can be performed. This traditionally isn't seen as a problem because
+the key(s) won't be present during an offline attack, which is the main type of
+attack that storage encryption is intended to protect from.
+
+However, there is an increasing desire to also protect users' data from other
+types of attacks (to the extent possible), including:
+
+- Cold boot attacks, where an attacker with physical access to a system suddenly
+ powers it off, then immediately dumps the system memory to extract recently
+ in-use encryption keys, then uses these keys to decrypt user data on-disk.
+
+- Online attacks where the attacker is able to read kernel memory without fully
+ compromising the system, followed by an offline attack where any extracted
+ keys can be used to decrypt user data on-disk. An example of such an online
+ attack would be if the attacker is able to run some code on the system that
+ exploits a Meltdown-like vulnerability but is unable to escalate privileges.
+
+- Online attacks where the attacker fully compromises the system, but their data
+ exfiltration is significantly time-limited and/or bandwidth-limited, so in
+ order to completely exfiltrate the data they need to extract the encryption
+ keys to use in a later offline attack.
+
+Hardware-wrapped keys are a feature of inline encryption hardware that is
+designed to protect users' data from the above attacks (to the extent possible),
+without introducing limitations such as a maximum number of keys.
+
+Note that it is impossible to **fully** protect users' data from these attacks.
+Even in the attacks where the attacker "just" gets read access to kernel memory,
+they can still extract any user data that is present in memory, including
+plaintext pagecache pages of encrypted files. The focus here is just on
+protecting the encryption keys, as those instantly give access to **all** user
+data in any following offline attack, rather than just some of it (where which
+data is included in that "some" might not be controlled by the attacker).
+
+Solution overview
+-----------------
+
+Inline encryption hardware typically has "keyslots" into which software can
+program keys for the hardware to use; the contents of keyslots typically can't
+be read back by software. As such, the above security goals could be achieved
+if the kernel simply erased its copy of the key(s) after programming them into
+keyslot(s) and thereafter only referred to them via keyslot number.
+
+However, that naive approach runs into a couple problems:
+
+- It limits the number of unlocked keys to the number of keyslots, which
+ typically is a small number. In cases where there is only one encryption key
+ system-wide (e.g., a full-disk encryption key), that can be tolerable.
+ However, in general there can be many logged-in users with many different
+ keys, and/or many running applications with application-specific encrypted
+ storage areas. This is especially true if file-based encryption (e.g.
+ fscrypt) is being used.
+
+- Inline crypto engines typically lose the contents of their keyslots if the
+ storage controller (usually UFS or eMMC) is reset. Resetting the storage
+ controller is a standard error recovery procedure that is executed if certain
+ types of storage errors occur, and such errors can occur at any time.
+ Therefore, when inline crypto is being used, the operating system must always
+ be ready to reprogram the keyslots without user intervention.
+
+Thus, it is important for the kernel to still have a way to "remind" the
+hardware about a key, without actually having the raw key itself.
+
+Somewhat less importantly, it is also desirable that the raw keys are never
+visible to software at all, even while being initially unlocked. This would
+ensure that a read-only compromise of system memory will never allow a key to be
+extracted to be used off-system, even if it occurs when a key is being unlocked.
+
+To solve all these problems, some vendors of inline encryption hardware have
+made their hardware support *hardware-wrapped keys*. Hardware-wrapped keys
+are encrypted keys that can only be unwrapped (decrypted) and used by hardware
+-- either by the inline encryption hardware itself, or by a dedicated hardware
+block that can directly provision keys to the inline encryption hardware.
+
+(We refer to them as "hardware-wrapped keys" rather than simply "wrapped keys"
+to add some clarity in cases where there could be other types of wrapped keys,
+such as in file-based encryption. Key wrapping is a commonly used technique.)
+
+The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key
+that is never exposed to software; it is either a persistent key (a "long-term
+wrapping key") or a per-boot key (an "ephemeral wrapping key"). The long-term
+wrapped form of the key is what is initially unlocked, but it is erased from
+memory as soon as it is converted into an ephemerally-wrapped key. In-use
+hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped.
+
+As inline encryption hardware can only be used to encrypt/decrypt data on-disk,
+the hardware also includes a level of indirection; it doesn't use the unwrapped
+key directly for inline encryption, but rather derives both an inline encryption
+key and a "software secret" from it. Software can use the "software secret" for
+tasks that can't use the inline encryption hardware, such as filenames
+encryption. The software secret is not protected from memory compromise.
+
+Key hierarchy
+-------------
+
+Here is the key hierarchy for a hardware-wrapped key::
+
+ Hardware-wrapped key
+ |
+ |
+ <Hardware KDF>
+ |
+ -----------------------------
+ | |
+ Inline encryption key Software secret
+
+The components are:
+
+- *Hardware-wrapped key*: a key for the hardware's KDF (Key Derivation
+ Function), in ephemerally-wrapped form. The key wrapping algorithm is a
+ hardware implementation detail that doesn't impact kernel operation, but a
+ strong authenticated encryption algorithm such as AES-256-GCM is recommended.
+
+- *Hardware KDF*: a KDF (Key Derivation Function) which the hardware uses to
+ derive subkeys after unwrapping the wrapped key. The hardware's choice of KDF
+ doesn't impact kernel operation, but it does need to be known for testing
+ purposes, and it's also assumed to have at least a 256-bit security strength.
+ All known hardware uses the SP800-108 KDF in Counter Mode with AES-256-CMAC,
+ with a particular choice of labels and contexts; new hardware should use this
+ already-vetted KDF.
+
+- *Inline encryption key*: a derived key which the hardware directly provisions
+ to a keyslot of the inline encryption hardware, without exposing it to
+ software. In all known hardware, this will always be an AES-256-XTS key.
+ However, in principle other encryption algorithms could be supported too.
+ Hardware must derive distinct subkeys for each supported encryption algorithm.
+
+- *Software secret*: a derived key which the hardware returns to software so
+ that software can use it for cryptographic tasks that can't use inline
+ encryption. This value is cryptographically isolated from the inline
+ encryption key, i.e. knowing one doesn't reveal the other. (The KDF ensures
+ this.) Currently, the software secret is always 32 bytes and thus is suitable
+ for cryptographic applications that require up to a 256-bit security strength.
+ Some use cases (e.g. full-disk encryption) won't require the software secret.
+
+Example: in the case of fscrypt, the fscrypt master key (the key that protects a
+particular set of encrypted directories) is made hardware-wrapped. The inline
+encryption key is used as the file contents encryption key, while the software
+secret (rather than the master key directly) is used to key fscrypt's KDF
+(HKDF-SHA512) to derive other subkeys such as filenames encryption keys.
+
+Note that currently this design assumes a single inline encryption key per
+hardware-wrapped key, without any further key derivation. Thus, in the case of
+fscrypt, currently hardware-wrapped keys are only compatible with the "inline
+encryption optimized" settings, which use one file contents encryption key per
+encryption policy rather than one per file. This design could be extended to
+make the hardware derive per-file keys using per-file nonces passed down the
+storage stack, and in fact some hardware already supports this; future work is
+planned to remove this limitation by adding the corresponding kernel support.
+
+Kernel support
+--------------
+
+The inline encryption support of the kernel's block layer ("blk-crypto") has
+been extended to support hardware-wrapped keys as an alternative to raw keys,
+when hardware support is available. This works in the following way:
+
+- A ``key_types_supported`` field is added to the crypto capabilities in
+ ``struct blk_crypto_profile``. This allows device drivers to declare that
+ they support raw keys, hardware-wrapped keys, or both.
+
+- ``struct blk_crypto_key`` can now contain a hardware-wrapped key as an
+ alternative to a raw key; a ``key_type`` field is added to
+ ``struct blk_crypto_config`` to distinguish between the different key types.
+ This allows users of blk-crypto to en/decrypt data using a hardware-wrapped
+ key in a way very similar to using a raw key.
+
+- A new method ``blk_crypto_ll_ops::derive_sw_secret`` is added. Device drivers
+ that support hardware-wrapped keys must implement this method. Users of
+ blk-crypto can call ``blk_crypto_derive_sw_secret()`` to access this method.
+
+- The programming and eviction of hardware-wrapped keys happens via
+ ``blk_crypto_ll_ops::keyslot_program`` and
+ ``blk_crypto_ll_ops::keyslot_evict``, just like it does for raw keys. If a
+ driver supports hardware-wrapped keys, then it must handle hardware-wrapped
+ keys being passed to these methods.
+
+blk-crypto-fallback doesn't support hardware-wrapped keys. Therefore,
+hardware-wrapped keys can only be used with actual inline encryption hardware.
+
+All the above deals with hardware-wrapped keys in ephemerally-wrapped form only.
+To get such keys in the first place, new block device ioctls have been added to
+provide a generic interface to creating and preparing such keys:
+
+- ``BLKCRYPTOIMPORTKEY`` converts a raw key to long-term wrapped form. It takes
+ in a pointer to a ``struct blk_crypto_import_key_arg``. The caller must set
+ ``raw_key_ptr`` and ``raw_key_size`` to the pointer and size (in bytes) of the
+ raw key to import. On success, ``BLKCRYPTOIMPORTKEY`` returns 0 and writes
+ the resulting long-term wrapped key blob to the buffer pointed to by
+ ``lt_key_ptr``, which is of maximum size ``lt_key_size``. It also updates
+ ``lt_key_size`` to be the actual size of the key. On failure, it returns -1
+ and sets errno. An errno of ``EOPNOTSUPP`` indicates that the block device
+ does not support hardware-wrapped keys. An errno of ``EOVERFLOW`` indicates
+ that the output buffer did not have enough space for the key blob.
+
+- ``BLKCRYPTOGENERATEKEY`` is like ``BLKCRYPTOIMPORTKEY``, but it has the
+ hardware generate the key instead of importing one. It takes in a pointer to
+ a ``struct blk_crypto_generate_key_arg``.
+
+- ``BLKCRYPTOPREPAREKEY`` converts a key from long-term wrapped form to
+ ephemerally-wrapped form. It takes in a pointer to a ``struct
+ blk_crypto_prepare_key_arg``. The caller must set ``lt_key_ptr`` and
+ ``lt_key_size`` to the pointer and size (in bytes) of the long-term wrapped
+ key blob to convert. On success, ``BLKCRYPTOPREPAREKEY`` returns 0 and writes
+ the resulting ephemerally-wrapped key blob to the buffer pointed to by
+ ``eph_key_ptr``, which is of maximum size ``eph_key_size``. It also updates
+ ``eph_key_size`` to be the actual size of the key. On failure, it returns -1
+ and sets errno. Errno values of ``EOPNOTSUPP`` and ``EOVERFLOW`` mean the
+ same as they do for ``BLKCRYPTOIMPORTKEY``. An errno of ``EBADMSG`` indicates
+ that the long-term wrapped key is invalid.
+
+Userspace needs to use either ``BLKCRYPTOIMPORTKEY`` or ``BLKCRYPTOGENERATEKEY``
+once to create a key, and then ``BLKCRYPTOPREPAREKEY`` each time the key is
+unlocked and added to the kernel. Note that these ioctls have no relevance for
+raw keys; they are only for hardware-wrapped keys.
+
+Testability
+-----------
+
+Both the hardware KDF and the inline encryption itself are well-defined
+algorithms that don't depend on any secrets other than the unwrapped key.
+Therefore, if the unwrapped key is known to software, these algorithms can be
+reproduced in software in order to verify the ciphertext that is written to disk
+by the inline encryption hardware.
+
+However, the unwrapped key will only be known to software for testing if the
+"import" functionality is used. Proper testing is not possible in the
+"generate" case where the hardware generates the key itself. The correct
+operation of the "generate" mode thus relies on the security and correctness of
+the hardware RNG and its use to generate the key, as well as the testing of the
+"import" mode as that should cover all parts other than the key generation.
+
+For an example of a test that verifies the ciphertext written to disk in the
+"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or
+`Android's vts_kernel_encryption_test
+<https://android.googlesource.com/platform/test/vts-testcase/kernel/+/refs/heads/main/encryption/>`_.
diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst
index ff74b3ec4a98..8c4030bcabb6 100644
--- a/Documentation/block/ublk.rst
+++ b/Documentation/block/ublk.rst
@@ -115,15 +115,15 @@ managing and controlling ublk devices with help of several control commands:
- ``UBLK_CMD_START_DEV``
- After the server prepares userspace resources (such as creating per-queue
- pthread & io_uring for handling ublk IO), this command is sent to the
+ After the server prepares userspace resources (such as creating I/O handler
+ threads & io_uring for handling ublk IO), this command is sent to the
driver for allocating & exposing ``/dev/ublkb*``. Parameters set via
``UBLK_CMD_SET_PARAMS`` are applied for creating the device.
- ``UBLK_CMD_STOP_DEV``
Halt IO on ``/dev/ublkb*`` and remove the device. When this command returns,
- ublk server will release resources (such as destroying per-queue pthread &
+ ublk server will release resources (such as destroying I/O handler threads &
io_uring).
- ``UBLK_CMD_DEL_DEV``
@@ -199,24 +199,36 @@ managing and controlling ublk devices with help of several control commands:
- user recovery feature description
- Two new features are added for user recovery: ``UBLK_F_USER_RECOVERY`` and
- ``UBLK_F_USER_RECOVERY_REISSUE``.
-
- With ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
- handler) is dying, ublk does not delete ``/dev/ublkb*`` during the whole
+ Three new features are added for user recovery: ``UBLK_F_USER_RECOVERY``,
+ ``UBLK_F_USER_RECOVERY_REISSUE``, and ``UBLK_F_USER_RECOVERY_FAIL_IO``. To
+ enable recovery of ublk devices after the ublk server exits, the ublk server
+ should specify the ``UBLK_F_USER_RECOVERY`` flag when creating the device. The
+ ublk server may additionally specify at most one of
+ ``UBLK_F_USER_RECOVERY_REISSUE`` and ``UBLK_F_USER_RECOVERY_FAIL_IO`` to
+ modify how I/O is handled while the ublk server is dying/dead (this is called
+ the ``nosrv`` case in the driver code).
+
+ With just ``UBLK_F_USER_RECOVERY`` set, after the ublk server exits,
+ ublk does not delete ``/dev/ublkb*`` during the whole
recovery stage and ublk device ID is kept. It is ublk server's
responsibility to recover the device context by its own knowledge.
Requests which have not been issued to userspace are requeued. Requests
which have been issued to userspace are aborted.
- With ``UBLK_F_USER_RECOVERY_REISSUE`` set, after one ubq_daemon(ublk
- server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
+ With ``UBLK_F_USER_RECOVERY_REISSUE`` additionally set, after the ublk server
+ exits, contrary to ``UBLK_F_USER_RECOVERY``,
requests which have been issued to userspace are requeued and will be
re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
double-write since the driver may issue the same I/O request twice. It
might be useful to a read-only FS or a VM backend.
+ With ``UBLK_F_USER_RECOVERY_FAIL_IO`` additionally set, after the ublk server
+ exits, requests which have issued to userspace are failed, as are any
+ subsequently issued requests. Applications continuously issuing I/O against
+ devices with this flag set will see a stream of I/O errors until a new ublk
+ server recovers the device.
+
Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
Once the flag is set, all control commands can be sent by unprivileged
user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
@@ -229,10 +241,11 @@ can be controlled/accessed just inside this container.
Data plane
----------
-ublk server needs to create per-queue IO pthread & io_uring for handling IO
-commands via io_uring passthrough. The per-queue IO pthread
-focuses on IO handling and shouldn't handle any control & management
-tasks.
+The ublk server should create dedicated threads for handling I/O. Each
+thread should have its own io_uring through which it is notified of new
+I/O, and through which it can complete I/O. These dedicated threads
+should focus on IO handling and shouldn't handle any control &
+management tasks.
The's IO is assigned by a unique tag, which is 1:1 mapping with IO
request of ``/dev/ublkb*``.
@@ -253,6 +266,18 @@ with specified IO tag in the command data:
destined to ``/dev/ublkb*``. This command is sent only once from the server
IO pthread for ublk driver to setup IO forward environment.
+ Once a thread issues this command against a given (qid,tag) pair, the thread
+ registers itself as that I/O's daemon. In the future, only that I/O's daemon
+ is allowed to issue commands against the I/O. If any other thread attempts
+ to issue a command against a (qid,tag) pair for which the thread is not the
+ daemon, the command will fail. Daemons can be reset only be going through
+ recovery.
+
+ The ability for every (qid,tag) pair to have its own independent daemon task
+ is indicated by the ``UBLK_F_PER_IO_DAEMON`` feature. If this feature is not
+ supported by the driver, daemons must be per-queue instead - i.e. all I/Os
+ associated to a single qid must be handled by the same task.
+
- ``UBLK_IO_COMMIT_AND_FETCH_REQ``
When an IO request is destined to ``/dev/ublkb*``, the driver stores
@@ -297,18 +322,112 @@ with specified IO tag in the command data:
``UBLK_IO_COMMIT_AND_FETCH_REQ`` to the server, ublkdrv needs to copy
the server buffer (pages) read to the IO request pages.
-Future development
-==================
-
Zero copy
---------
-Zero copy is a generic requirement for nbd, fuse or similar drivers. A
-problem [#xiaoguang]_ Xiaoguang mentioned is that pages mapped to userspace
-can't be remapped any more in kernel with existing mm interfaces. This can
-occurs when destining direct IO to ``/dev/ublkb*``. Also, he reported that
-big requests (IO size >= 256 KB) may benefit a lot from zero copy.
+ublk zero copy relies on io_uring's fixed kernel buffer, which provides
+two APIs: `io_buffer_register_bvec()` and `io_buffer_unregister_bvec`.
+
+ublk adds IO command of `UBLK_IO_REGISTER_IO_BUF` to call
+`io_buffer_register_bvec()` for ublk server to register client request
+buffer into io_uring buffer table, then ublk server can submit io_uring
+IOs with the registered buffer index. IO command of `UBLK_IO_UNREGISTER_IO_BUF`
+calls `io_buffer_unregister_bvec()` to unregister the buffer, which is
+guaranteed to be live between calling `io_buffer_register_bvec()` and
+`io_buffer_unregister_bvec()`. Any io_uring operation which supports this
+kind of kernel buffer will grab one reference of the buffer until the
+operation is completed.
+
+ublk server implementing zero copy or user copy has to be CAP_SYS_ADMIN and
+be trusted, because it is ublk server's responsibility to make sure IO buffer
+filled with data for handling read command, and ublk server has to return
+correct result to ublk driver when handling READ command, and the result
+has to match with how many bytes filled to the IO buffer. Otherwise,
+uninitialized kernel IO buffer will be exposed to client application.
+
+ublk server needs to align the parameter of `struct ublk_param_dma_align`
+with backend for zero copy to work correctly.
+
+For reaching best IO performance, ublk server should align its segment
+parameter of `struct ublk_param_segment` with backend for avoiding
+unnecessary IO split, which usually hurts io_uring performance.
+
+Auto Buffer Registration
+------------------------
+
+The ``UBLK_F_AUTO_BUF_REG`` feature automatically handles buffer registration
+and unregistration for I/O requests, which simplifies the buffer management
+process and reduces overhead in the ublk server implementation.
+
+This is another feature flag for using zero copy, and it is compatible with
+``UBLK_F_SUPPORT_ZERO_COPY``.
+
+Feature Overview
+~~~~~~~~~~~~~~~~
+
+This feature automatically registers request buffers to the io_uring context
+before delivering I/O commands to the ublk server and unregisters them when
+completing I/O commands. This eliminates the need for manual buffer
+registration/unregistration via ``UBLK_IO_REGISTER_IO_BUF`` and
+``UBLK_IO_UNREGISTER_IO_BUF`` commands, then IO handling in ublk server
+can avoid dependency on the two uring_cmd operations.
+
+IOs can't be issued concurrently to io_uring if there is any dependency
+among these IOs. So this way not only simplifies ublk server implementation,
+but also makes concurrent IO handling becomes possible by removing the
+dependency on buffer registration & unregistration commands.
+
+Usage Requirements
+~~~~~~~~~~~~~~~~~~
+1. The ublk server must create a sparse buffer table on the same ``io_ring_ctx``
+ used for ``UBLK_IO_FETCH_REQ`` and ``UBLK_IO_COMMIT_AND_FETCH_REQ``. If
+ uring_cmd is issued on a different ``io_ring_ctx``, manual buffer
+ unregistration is required.
+
+2. Buffer registration data must be passed via uring_cmd's ``sqe->addr`` with the
+ following structure::
+
+ struct ublk_auto_buf_reg {
+ __u16 index; /* Buffer index for registration */
+ __u8 flags; /* Registration flags */
+ __u8 reserved0; /* Reserved for future use */
+ __u32 reserved1; /* Reserved for future use */
+ };
+
+ ublk_auto_buf_reg_to_sqe_addr() is for converting the above structure into
+ ``sqe->addr``.
+
+3. All reserved fields in ``ublk_auto_buf_reg`` must be zeroed.
+
+4. Optional flags can be passed via ``ublk_auto_buf_reg.flags``.
+
+Fallback Behavior
+~~~~~~~~~~~~~~~~~
+
+If auto buffer registration fails:
+
+1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled:
+
+ - The uring_cmd is completed
+ - ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags``
+ - The ublk server must manually deal with the failure, such as, register
+ the buffer manually, or using user copy feature for retrieving the data
+ for handling ublk IO
+
+2. If fallback is not enabled:
+
+ - The ublk I/O request fails silently
+ - The uring_cmd won't be completed
+
+Limitations
+~~~~~~~~~~~
+
+- Requires same ``io_ring_ctx`` for all operations
+- May require manual buffer management in fallback cases
+- io_ring_ctx buffer table has a max size of 16K, which may not be enough
+ in case that too many ublk devices are handled by this single io_ring_ctx
+ and each one has very large queue depth
References
==========
@@ -320,7 +439,3 @@ References
.. [#userspace_nbdublk] https://gitlab.com/rwmjones/libnbd/-/tree/nbdublk
.. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README
-
-.. [#stefan] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
-
-.. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
diff --git a/Documentation/block/writeback_cache_control.rst b/Documentation/block/writeback_cache_control.rst
index b208488d0aae..c3707d071780 100644
--- a/Documentation/block/writeback_cache_control.rst
+++ b/Documentation/block/writeback_cache_control.rst
@@ -46,41 +46,50 @@ worry if the underlying devices need any explicit cache flushing and how
the Forced Unit Access is implemented. The REQ_PREFLUSH and REQ_FUA flags
may both be set on a single bio.
+Feature settings for block drivers
+----------------------------------
-Implementation details for bio based block drivers
---------------------------------------------------------------
+For devices that do not support volatile write caches there is no driver
+support required, the block layer completes empty REQ_PREFLUSH requests before
+entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
+requests that have a payload.
-These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
-directly below the submit_bio interface. For remapping drivers the REQ_FUA
-bits need to be propagated to underlying devices, and a global flush needs
-to be implemented for bios with the REQ_PREFLUSH bit set. For real device
-drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits
-on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without
-data can be completed successfully without doing any work. Drivers for
-devices with volatile caches need to implement the support for these
-flags themselves without any help from the block layer.
+For devices with volatile write caches the driver needs to tell the block layer
+that it supports flushing caches by setting the
+ BLK_FEAT_WRITE_CACHE
-Implementation details for request_fn based block drivers
----------------------------------------------------------
+flag in the queue_limits feature field. For devices that also support the FUA
+bit the block layer needs to be told to pass on the REQ_FUA bit by also setting
+the
-For devices that do not support volatile write caches there is no driver
-support required, the block layer completes empty REQ_PREFLUSH requests before
-entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
-requests that have a payload. For devices with volatile write caches the
-driver needs to tell the block layer that it supports flushing caches by
-doing::
+ BLK_FEAT_FUA
+
+flag in the features field of the queue_limits structure.
+
+Implementation details for bio based block drivers
+--------------------------------------------------
+
+For bio based drivers the REQ_PREFLUSH and REQ_FUA bit are simply passed on to
+the driver if the driver sets the BLK_FEAT_WRITE_CACHE flag and the driver
+needs to handle them.
+
+*NOTE*: The REQ_FUA bit also gets passed on when the BLK_FEAT_FUA flags is
+_not_ set. Any bio based driver that sets BLK_FEAT_WRITE_CACHE also needs to
+handle REQ_FUA.
- blk_queue_write_cache(sdkp->disk->queue, true, false);
+For remapping drivers the REQ_FUA bits need to be propagated to underlying
+devices, and a global flush needs to be implemented for bios with the
+REQ_PREFLUSH bit set.
-and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn. Note that
-REQ_PREFLUSH requests with a payload are automatically turned into a sequence
-of an empty REQ_OP_FLUSH request followed by the actual write by the block
-layer. For devices that also support the FUA bit the block layer needs
-to be told to pass through the REQ_FUA bit using::
+Implementation details for blk-mq drivers
+-----------------------------------------
- blk_queue_write_cache(sdkp->disk->queue, true, true);
+When the BLK_FEAT_WRITE_CACHE flag is set, REQ_OP_WRITE | REQ_PREFLUSH requests
+with a payload are automatically turned into a sequence of a REQ_OP_FLUSH
+request followed by the actual write by the block layer.
-and the driver must handle write requests that have the REQ_FUA bit set
-in prep_fn/request_fn. If the FUA bit is not natively supported the block
-layer turns it into an empty REQ_OP_FLUSH request after the actual write.
+When the BLK_FEAT_FUA flags is set, the REQ_FUA bit is simply passed on for the
+REQ_OP_WRITE request, else a REQ_OP_FLUSH request is sent by the block layer
+after the completion of the write request for bio submissions with the REQ_FUA
+bit set.
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index de27e1620821..45bc5c5cd793 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -382,6 +382,14 @@ In case of new BPF instructions, once the changes have been accepted
into the Linux kernel, please implement support into LLVM's BPF back
end. See LLVM_ section below for further information.
+Q: What "BPF_INTERNAL" symbol namespace is for?
+-----------------------------------------------
+A: Symbols exported as BPF_INTERNAL can only be used by BPF infrastructure
+like preload kernel modules with light skeleton. Most symbols outside
+of BPF_INTERNAL are not expected to be used by code outside of BPF either.
+Symbols may lack the designation because they predate the namespaces,
+or due to an oversight.
+
Stable submission
=================
@@ -603,9 +611,10 @@ Q: I have added a new BPF instruction to the kernel, how can I integrate
it into LLVM?
A: LLVM has a ``-mcpu`` selector for the BPF back end in order to allow
-the selection of BPF instruction set extensions. By default the
-``generic`` processor target is used, which is the base instruction set
-(v1) of BPF.
+the selection of BPF instruction set extensions. Before llvm version 20,
+the ``generic`` processor target is used, which is the base instruction
+set (v1) of BPF. Since llvm 20, the default processor target has changed
+to instruction set v3.
LLVM has an option to select ``-mcpu=probe`` where it will probe the host
kernel for supported BPF instruction set extensions and selects the
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst
index 07433915aa41..189e3ec1c6c8 100644
--- a/Documentation/bpf/bpf_iterators.rst
+++ b/Documentation/bpf/bpf_iterators.rst
@@ -2,10 +2,117 @@
BPF Iterators
=============
+--------
+Overview
+--------
+
+BPF supports two separate entities collectively known as "BPF iterators": BPF
+iterator *program type* and *open-coded* BPF iterators. The former is
+a stand-alone BPF program type which, when attached and activated by user,
+will be called once for each entity (task_struct, cgroup, etc) that is being
+iterated. The latter is a set of BPF-side APIs implementing iterator
+functionality and available across multiple BPF program types. Open-coded
+iterators provide similar functionality to BPF iterator programs, but gives
+more flexibility and control to all other BPF program types. BPF iterator
+programs, on the other hand, can be used to implement anonymous or BPF
+FS-mounted special files, whose contents are generated by attached BPF iterator
+program, backed by seq_file functionality. Both are useful depending on
+specific needs.
+
+When adding a new BPF iterator program, it is expected that similar
+functionality will be added as open-coded iterator for maximum flexibility.
+It's also expected that iteration logic and code will be maximally shared and
+reused between two iterator API surfaces.
-----------
-Motivation
-----------
+------------------------
+Open-coded BPF Iterators
+------------------------
+
+Open-coded BPF iterators are implemented as tightly-coupled trios of kfuncs
+(constructor, next element fetch, destructor) and iterator-specific type
+describing on-the-stack iterator state, which is guaranteed by the BPF
+verifier to not be tampered with outside of the corresponding
+constructor/destructor/next APIs.
+
+Each kind of open-coded BPF iterator has its own associated
+struct bpf_iter_<type>, where <type> denotes a specific type of iterator.
+bpf_iter_<type> state needs to live on BPF program stack, so make sure it's
+small enough to fit on BPF stack. For performance reasons its best to avoid
+dynamic memory allocation for iterator state and size the state struct big
+enough to fit everything necessary. But if necessary, dynamic memory
+allocation is a way to bypass BPF stack limitations. Note, state struct size
+is part of iterator's user-visible API, so changing it will break backwards
+compatibility, so be deliberate about designing it.
+
+All kfuncs (constructor, next, destructor) have to be named consistently as
+bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents iterator
+type, and iterator state should be represented as a matching
+`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
+a pointer to this `struct bpf_iter_<type>` as the very first argument.
+
+Additionally:
+ - Constructor, i.e., `bpf_iter_<type>_new()`, can have arbitrary extra
+ number of arguments. Return type is not enforced either.
+ - Next method, i.e., `bpf_iter_<type>_next()`, has to return a pointer
+ type and should have exactly one argument: `struct bpf_iter_<type> *`
+ (const/volatile/restrict and typedefs are ignored).
+ - Destructor, i.e., `bpf_iter_<type>_destroy()`, should return void and
+ should have exactly one argument, similar to the next method.
+ - `struct bpf_iter_<type>` size is enforced to be positive and
+ a multiple of 8 bytes (to fit stack slots correctly).
+
+Such strictness and consistency allows to build generic helpers abstracting
+important, but boilerplate, details to be able to use open-coded iterators
+effectively and ergonomically (see libbpf's bpf_for_each() macro). This is
+enforced at kfunc registration point by the kernel.
+
+Constructor/next/destructor implementation contract is as follows:
+ - constructor, `bpf_iter_<type>_new()`, always initializes iterator state on
+ the stack. If any of the input arguments are invalid, constructor should
+ make sure to still initialize it such that subsequent next() calls will
+ return NULL. I.e., on error, *return error and construct empty iterator*.
+ Constructor kfunc is marked with KF_ITER_NEW flag.
+
+ - next method, `bpf_iter_<type>_next()`, accepts pointer to iterator state
+ and produces an element. Next method should always return a pointer. The
+ contract between BPF verifier is that next method *guarantees* that it
+ will eventually return NULL when elements are exhausted. Once NULL is
+ returned, subsequent next calls *should keep returning NULL*. Next method
+ is marked with KF_ITER_NEXT (and should also have KF_RET_NULL as
+ NULL-returning kfunc, of course).
+
+ - destructor, `bpf_iter_<type>_destroy()`, is always called once. Even if
+ constructor failed or next returned nothing. Destructor frees up any
+ resources and marks stack space used by `struct bpf_iter_<type>` as usable
+ for something else. Destructor is marked with KF_ITER_DESTROY flag.
+
+Any open-coded BPF iterator implementation has to implement at least these
+three methods. It is enforced that for any given type of iterator only
+applicable constructor/destructor/next are callable. I.e., verifier ensures
+you can't pass number iterator state into, say, cgroup iterator's next method.
+
+From a 10,000-feet BPF verification point of view, next methods are the points
+of forking a verification state, which are conceptually similar to what
+verifier is doing when validating conditional jumps. Verifier is branching out
+`call bpf_iter_<type>_next` instruction and simulates two outcomes: NULL
+(iteration is done) and non-NULL (new element is returned). NULL is simulated
+first and is supposed to reach exit without looping. After that non-NULL case
+is validated and it either reaches exit (for trivial examples with no real
+loop), or reaches another `call bpf_iter_<type>_next` instruction with the
+state equivalent to already (partially) validated one. State equivalency at
+that point means we technically are going to be looping forever without
+"breaking out" out of established "state envelope" (i.e., subsequent
+iterations don't add any new knowledge or constraints to the verifier state,
+so running 1, 2, 10, or a million of them doesn't matter). But taking into
+account the contract stating that iterator next method *has to* return NULL
+eventually, we can conclude that loop body is safe and will eventually
+terminate. Given we validated logic outside of the loop (NULL case), and
+concluded that loop body is safe (though potentially looping many times),
+verifier can claim safety of the overall program logic.
+
+------------------------
+BPF Iterators Motivation
+------------------------
There are a few existing ways to dump kernel data into user space. The most
popular one is the ``/proc`` system. For example, ``cat /proc/net/tcp6`` dumps
@@ -86,7 +193,7 @@ following steps:
The following are a few examples of selftest BPF iterator programs:
* `bpf_iter_tcp4.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c>`_
-* `bpf_iter_task_vma.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c>`_
+* `bpf_iter_task_vmas.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c>`_
* `bpf_iter_task_file.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c>`_
Let us look at ``bpf_iter_task_file.c``, which runs in kernel space:
@@ -323,8 +430,8 @@ Now, in the userspace program, pass the pointer of struct to the
::
- link = bpf_program__attach_iter(prog, &opts); iter_fd =
- bpf_iter_create(bpf_link__fd(link));
+ link = bpf_program__attach_iter(prog, &opts);
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
If both *tid* and *pid* are zero, an iterator created from this struct
``bpf_iter_attach_opts`` will include every opened file of every task in the
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 257a7e1cdf5d..3b60583f5db2 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -102,7 +102,8 @@ Each type contains the following common data::
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union, fwd, enum and enum64.
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64.
@@ -368,7 +369,7 @@ No additional type data follow ``btf_type``.
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_FUNC
* ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL
- or BTF_FUNC_EXTERN)
+ or BTF_FUNC_EXTERN - see :ref:`BTF_Function_Linkage_Constants`)
* ``type``: a BTF_KIND_FUNC_PROTO type
No additional type data follow ``btf_type``.
@@ -424,9 +425,8 @@ following data::
__u32 linkage;
};
-``struct btf_var`` encoding:
- * ``linkage``: currently only static variable 0, or globally allocated
- variable in ELF sections 1
+``btf_var.linkage`` may take the values: BTF_VAR_STATIC, BTF_VAR_GLOBAL_ALLOCATED or BTF_VAR_GLOBAL_EXTERN -
+see :ref:`BTF_Var_Linkage_Constants`.
Not all type of global variables are supported by LLVM at this point.
The following is currently available:
@@ -479,7 +479,7 @@ No additional type data follow ``btf_type``.
``struct btf_type`` encoding requirement:
* ``name_off``: offset to a non-empty string
- * ``info.kind_flag``: 0
+ * ``info.kind_flag``: 0 or 1
* ``info.kind``: BTF_KIND_DECL_TAG
* ``info.vlen``: 0
* ``type``: ``struct``, ``union``, ``func``, ``var`` or ``typedef``
@@ -490,7 +490,6 @@ No additional type data follow ``btf_type``.
__u32 component_idx;
};
-The ``name_off`` encodes btf_decl_tag attribute string.
The ``type`` should be ``struct``, ``union``, ``func``, ``var`` or ``typedef``.
For ``var`` or ``typedef`` type, ``btf_decl_tag.component_idx`` must be ``-1``.
For the other three types, if the btf_decl_tag attribute is
@@ -500,12 +499,21 @@ the attribute is applied to a ``struct``/``union`` member or
a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a
valid index (starting from 0) pointing to a member or an argument.
+If ``info.kind_flag`` is 0, then this is a normal decl tag, and the
+``name_off`` encodes btf_decl_tag attribute string.
+
+If ``info.kind_flag`` is 1, then the decl tag represents an arbitrary
+__attribute__. In this case, ``name_off`` encodes a string
+representing the attribute-list of the attribute specifier. For
+example, for an ``__attribute__((aligned(4)))`` the string's contents
+is ``aligned(4)``.
+
2.2.18 BTF_KIND_TYPE_TAG
~~~~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: offset to a non-empty string
- * ``info.kind_flag``: 0
+ * ``info.kind_flag``: 0 or 1
* ``info.kind``: BTF_KIND_TYPE_TAG
* ``info.vlen``: 0
* ``type``: the type with ``btf_type_tag`` attribute
@@ -523,6 +531,14 @@ type_tag, then zero or more const/volatile/restrict/typedef
and finally the base type. The base type is one of
int, ptr, array, struct, union, enum, func_proto and float types.
+Similarly to decl tags, if the ``info.kind_flag`` is 0, then this is a
+normal type tag, and the ``name_off`` encodes btf_type_tag attribute
+string.
+
+If ``info.kind_flag`` is 1, then the type tag represents an arbitrary
+__attribute__, and the ``name_off`` encodes a string representing the
+attribute-list of the attribute specifier.
+
2.2.19 BTF_KIND_ENUM64
~~~~~~~~~~~~~~~~~~~~~~
@@ -549,6 +565,38 @@ The ``btf_enum64`` encoding:
If the original enum value is signed and the size is less than 8,
that value will be sign extended into 8 bytes.
+2.3 Constant Values
+-------------------
+
+.. _BTF_Function_Linkage_Constants:
+
+2.3.1 Function Linkage Constant Values
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. table:: Function Linkage Values and Meanings
+
+ =================== ===== ===========
+ kind value description
+ =================== ===== ===========
+ ``BTF_FUNC_STATIC`` 0x0 definition of subprogram not visible outside containing compilation unit
+ ``BTF_FUNC_GLOBAL`` 0x1 definition of subprogram visible outside containing compilation unit
+ ``BTF_FUNC_EXTERN`` 0x2 declaration of a subprogram whose definition is outside the containing compilation unit
+ =================== ===== ===========
+
+
+.. _BTF_Var_Linkage_Constants:
+
+2.3.2 Variable Linkage Constant Values
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. table:: Variable Linkage Values and Meanings
+
+ ============================ ===== ===========
+ kind value description
+ ============================ ===== ===========
+ ``BTF_VAR_STATIC`` 0x0 definition of global variable not visible outside containing compilation unit
+ ``BTF_VAR_GLOBAL_ALLOCATED`` 0x1 definition of global variable visible outside containing compilation unit
+ ``BTF_VAR_GLOBAL_EXTERN`` 0x2 declaration of global variable whose definition is outside the containing compilation unit
+ ============================ ===== ===========
+
3. BTF Kernel API
=================
@@ -804,7 +852,7 @@ section named by ``btf_ext_info_sec->sec_name_off``.
See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
for more information on CO-RE relocations.
-4.2 .BTF_ids section
+4.3 .BTF_ids section
--------------------
The .BTF_ids section encodes BTF ID values that are used within the kernel.
@@ -865,6 +913,81 @@ and is used as a filter when resolving the BTF ID value.
All the BTF ID lists and sets are compiled in the .BTF_ids section and
resolved during the linking phase of kernel build by ``resolve_btfids`` tool.
+4.4 .BTF.base section
+---------------------
+Split BTF - where the .BTF section only contains types not in the associated
+base .BTF section - is an extremely efficient way to encode type information
+for kernel modules, since they generally consist of a few module-specific
+types along with a large set of shared kernel types. The former are encoded
+in split BTF, while the latter are encoded in base BTF, resulting in more
+compact representations. A type in split BTF that refers to a type in
+base BTF refers to it using its base BTF ID, and split BTF IDs start
+at last_base_BTF_ID + 1.
+
+The downside of this approach however is that this makes the split BTF
+somewhat brittle - when the base BTF changes, base BTF ID references are
+no longer valid and the split BTF itself becomes useless. The role of the
+.BTF.base section is to make split BTF more resilient for cases where
+the base BTF may change, as is the case for kernel modules not built every
+time the kernel is for example. .BTF.base contains named base types; INTs,
+FLOATs, STRUCTs, UNIONs, ENUM[64]s and FWDs. INTs and FLOATs are fully
+described in .BTF.base sections, while composite types like structs
+and unions are not fully defined - the .BTF.base type simply serves as
+a description of the type the split BTF referred to, so structs/unions
+have 0 members in the .BTF.base section. ENUM[64]s are similarly recorded
+with 0 members. Any other types are added to the split BTF. This
+distillation process then leaves us with a .BTF.base section with
+such minimal descriptions of base types and .BTF split section which refers
+to those base types. Later, we can relocate the split BTF using both the
+information stored in the .BTF.base section and the new .BTF base; the type
+information in the .BTF.base section allows us to update the split BTF
+references to point at the corresponding new base BTF IDs.
+
+BTF relocation happens on kernel module load when a kernel module has a
+.BTF.base section, and libbpf also provides a btf__relocate() API to
+accomplish this.
+
+As an example consider the following base BTF::
+
+ [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [2] STRUCT 'foo' size=8 vlen=2
+ 'f1' type_id=1 bits_offset=0
+ 'f2' type_id=1 bits_offset=32
+
+...and associated split BTF::
+
+ [3] PTR '(anon)' type_id=2
+
+i.e. split BTF describes a pointer to struct foo { int f1; int f2 };
+
+.BTF.base will consist of::
+
+ [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [2] STRUCT 'foo' size=8 vlen=0
+
+If we relocate the split BTF later using the following new base BTF::
+
+ [1] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none)
+ [2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [3] STRUCT 'foo' size=8 vlen=2
+ 'f1' type_id=2 bits_offset=0
+ 'f2' type_id=2 bits_offset=32
+
+...we can use our .BTF.base description to know that the split BTF reference
+is to struct foo, and relocation results in new split BTF::
+
+ [4] PTR '(anon)' type_id=3
+
+Note that we had to update BTF ID and start BTF ID for the split BTF.
+
+So we see how .BTF.base plays the role of facilitating later relocation,
+leading to more resilient split BTF.
+
+.BTF.base sections will be generated automatically for out-of-tree kernel module
+builds - i.e. where KBUILD_EXTMOD is set (as it would be for "make M=path/2/mod"
+cases). .BTF.base generation requires pahole support for the "distilled_base"
+BTF feature; this is available in pahole v1.28 and later.
+
5. Using BTF
============
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 7985c6615f3c..e38941370b90 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -160,6 +160,23 @@ Or::
...
}
+2.2.6 __prog Annotation
+---------------------------
+This annotation is used to indicate that the argument needs to be fixed up to
+the bpf_prog_aux of the caller BPF program. Any value passed into this argument
+is ignored, and rewritten by the verifier.
+
+An example is given below::
+
+ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *value),
+ unsigned int flags,
+ void *aux__prog)
+ {
+ struct bpf_prog_aux *aux = aux__prog;
+ ...
+ }
+
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
@@ -177,10 +194,10 @@ In addition to kfuncs' arguments, verifier may need more information about the
type of kfunc(s) being registered with the BPF subsystem. To do so, we define
flags on a set of kfuncs as follows::
- BTF_SET8_START(bpf_task_set)
+ BTF_KFUNCS_START(bpf_task_set)
BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE)
- BTF_SET8_END(bpf_task_set)
+ BTF_KFUNCS_END(bpf_task_set)
This set encodes the BTF ID of each kfunc listed above, and encodes the flags
along with it. Ofcourse, it is also allowed to specify no flags.
@@ -318,9 +335,26 @@ consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
also be KF_RET_NULL.
+2.4.8 KF_RCU_PROTECTED flag
+---------------------------
+
+The KF_RCU_PROTECTED flag is used to indicate that the kfunc must be invoked in
+an RCU critical section. This is assumed by default in non-sleepable programs,
+and must be explicitly ensured by calling ``bpf_rcu_read_lock`` for sleepable
+ones.
+
+If the kfunc returns a pointer value, this flag also enforces that the returned
+pointer is RCU protected, and can only be used while the RCU critical section is
+active.
+
+The flag is distinct from the ``KF_RCU`` flag, which only ensures that its
+arguments are at least RCU protected pointers. This may transitively imply that
+RCU protection is ensured, but it does not work in cases of kfuncs which require
+RCU protection but do not take RCU protected arguments.
+
.. _KF_deprecated_flag:
-2.4.8 KF_DEPRECATED flag
+2.4.9 KF_DEPRECATED flag
------------------------
The KF_DEPRECATED flag is used for kfuncs which are scheduled to be
@@ -347,10 +381,10 @@ Once the kfunc is prepared for use, the final step to making it visible is
registering it with the BPF subsystem. Registration is done per BPF program
type. An example is shown below::
- BTF_SET8_START(bpf_task_set)
+ BTF_KFUNCS_START(bpf_task_set)
BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE)
- BTF_SET8_END(bpf_task_set)
+ BTF_KFUNCS_END(bpf_task_set)
static const struct btf_kfunc_id_set bpf_task_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/Documentation/bpf/libbpf/libbpf_overview.rst b/Documentation/bpf/libbpf/libbpf_overview.rst
index f36a2d4ffea2..f4d22f0c62b0 100644
--- a/Documentation/bpf/libbpf/libbpf_overview.rst
+++ b/Documentation/bpf/libbpf/libbpf_overview.rst
@@ -219,6 +219,14 @@ compilation and skeleton generation. Using Libbpf-rs will make building user
space part of the BPF application easier. Note that the BPF program themselves
must still be written in plain C.
+libbpf logging
+==============
+
+By default, libbpf logs informational and warning messages to stderr. The
+verbosity of these messages can be controlled by setting the environment
+variable LIBBPF_LOG_LEVEL to either warn, info, or debug. A custom log
+callback can be set using ``libbpf_set_print()``.
+
Additional Documentation
========================
diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst
index 63bb88846e50..3b837522834b 100644
--- a/Documentation/bpf/libbpf/program_types.rst
+++ b/Documentation/bpf/libbpf/program_types.rst
@@ -100,10 +100,26 @@ described in more detail in the footnotes.
| | | ``uretprobe.s+`` [#uprobe]_ | Yes |
+ + +----------------------------------+-----------+
| | | ``usdt+`` [#usdt]_ | |
++ + +----------------------------------+-----------+
+| | | ``usdt.s+`` [#usdt]_ | Yes |
+ +----------------------------------------+----------------------------------+-----------+
| | ``BPF_TRACE_KPROBE_MULTI`` | ``kprobe.multi+`` [#kpmulti]_ | |
+ + +----------------------------------+-----------+
| | | ``kretprobe.multi+`` [#kpmulti]_ | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_KPROBE_SESSION`` | ``kprobe.session+`` [#kpmulti]_ | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_UPROBE_MULTI`` | ``uprobe.multi+`` [#upmul]_ | |
++ + +----------------------------------+-----------+
+| | | ``uprobe.multi.s+`` [#upmul]_ | Yes |
++ + +----------------------------------+-----------+
+| | | ``uretprobe.multi+`` [#upmul]_ | |
++ + +----------------------------------+-----------+
+| | | ``uretprobe.multi.s+`` [#upmul]_ | Yes |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TRACE_UPROBE_SESSION`` | ``uprobe.session+`` [#upmul]_ | |
++ + +----------------------------------+-----------+
+| | | ``uprobe.session.s+`` [#upmul]_ | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LIRC_MODE2`` | ``BPF_LIRC_MODE2`` | ``lirc_mode2`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@@ -121,6 +137,8 @@ described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_LWT_XMIT`` | | ``lwt_xmit`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
+| ``BPF_PROG_TYPE_NETFILTER`` | | ``netfilter`` | |
++-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_PERF_EVENT`` | | ``perf_event`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE`` | | ``raw_tp.w+`` [#rawtp]_ | |
@@ -131,11 +149,23 @@ described in more detail in the footnotes.
+ + +----------------------------------+-----------+
| | | ``raw_tracepoint+`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
-| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` | |
+| ``BPF_PROG_TYPE_SCHED_ACT`` | | ``action`` [#tc_legacy]_ | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
-| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` | |
+| ``BPF_PROG_TYPE_SCHED_CLS`` | | ``classifier`` [#tc_legacy]_ | |
+ + +----------------------------------+-----------+
-| | | ``tc`` | |
+| | | ``tc`` [#tc_legacy]_ | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_NETKIT_PRIMARY`` | ``netkit/primary`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_NETKIT_PEER`` | ``netkit/peer`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TCX_INGRESS`` | ``tc/ingress`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TCX_EGRESS`` | ``tc/egress`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TCX_INGRESS`` | ``tcx/ingress`` | |
++ +----------------------------------------+----------------------------------+-----------+
+| | ``BPF_TCX_EGRESS`` | ``tcx/egress`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SK_LOOKUP`` | ``BPF_SK_LOOKUP`` | ``sk_lookup`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@@ -155,7 +185,9 @@ described in more detail in the footnotes.
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SOCK_OPS`` | ``BPF_CGROUP_SOCK_OPS`` | ``sockops`` | |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
-| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` | |
+| ``BPF_PROG_TYPE_STRUCT_OPS`` | | ``struct_ops+`` [#struct_ops]_ | |
++ + +----------------------------------+-----------+
+| | | ``struct_ops.s+`` [#struct_ops]_ | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
| ``BPF_PROG_TYPE_SYSCALL`` | | ``syscall`` | Yes |
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
@@ -203,11 +235,19 @@ described in more detail in the footnotes.
non-negative integer.
.. [#ksyscall] The ``ksyscall`` attach format is ``ksyscall/<syscall>``.
.. [#uprobe] The ``uprobe`` attach format is ``uprobe[.s]/<path>:<function>[+<offset>]``.
+.. [#upmul] The ``uprobe.multi`` attach format is ``uprobe.multi[.s]/<path>:<function-pattern>``
+ where ``function-pattern`` supports ``*`` and ``?`` wildcards.
.. [#usdt] The ``usdt`` attach format is ``usdt/<path>:<provider>:<name>``.
.. [#kpmulti] The ``kprobe.multi`` attach format is ``kprobe.multi/<pattern>`` where ``pattern``
supports ``*`` and ``?`` wildcards. Valid characters for pattern are
``a-zA-Z0-9_.*?``.
.. [#lsm] The ``lsm`` attachment format is ``lsm[.s]/<hook>``.
.. [#rawtp] The ``raw_tp`` attach format is ``raw_tracepoint[.w]/<tracepoint>``.
+.. [#tc_legacy] The ``tc``, ``classifier`` and ``action`` attach types are deprecated, use
+ ``tcx/*`` instead.
+.. [#struct_ops] The ``struct_ops`` attach format supports ``struct_ops[.s]/<name>`` convention,
+ but ``name`` is ignored and it is recommended to just use plain
+ ``SEC("struct_ops[.s]")``. The attachments are defined in a struct initializer
+ that is tagged with ``SEC(".struct_ops[.link]")``.
.. [#tp] The ``tracepoint`` attach format is ``tracepoint/<category>/<name>``.
.. [#iter] The ``iter`` attach format is ``iter[.s]/<struct-name>``.
diff --git a/Documentation/bpf/map_array.rst b/Documentation/bpf/map_array.rst
index f2f51a53e8ae..fa56ff75190c 100644
--- a/Documentation/bpf/map_array.rst
+++ b/Documentation/bpf/map_array.rst
@@ -15,8 +15,9 @@ of constant size. The size of the array is defined in ``max_entries`` at
creation time. All array elements are pre-allocated and zero initialized when
created. ``BPF_MAP_TYPE_PERCPU_ARRAY`` uses a different memory region for each
CPU whereas ``BPF_MAP_TYPE_ARRAY`` uses the same memory region. The value
-stored can be of any size, however, all array elements are aligned to 8
-bytes.
+stored can be of any size for ``BPF_MAP_TYPE_ARRAY`` and not more than
+``PCPU_MIN_UNIT_SIZE`` (32 kB) for ``BPF_MAP_TYPE_PERCPU_ARRAY``. All
+array elements are aligned to 8 bytes.
Since kernel 5.5, memory mapping may be enabled for ``BPF_MAP_TYPE_ARRAY`` by
setting the flag ``BPF_F_MMAPABLE``. The map definition is page-aligned and
diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst
index d2343952f2cb..8606bf958a8c 100644
--- a/Documentation/bpf/map_hash.rst
+++ b/Documentation/bpf/map_hash.rst
@@ -233,10 +233,16 @@ attempts in order to enforce the LRU property which have increasing impacts on
other CPUs involved in the following operation attempts:
- Attempt to use CPU-local state to batch operations
-- Attempt to fetch free nodes from global lists
+- Attempt to fetch ``target_free`` free nodes from global lists
- Attempt to pull any node from a global list and remove it from the hashmap
- Attempt to pull any node from any CPU's list and remove it from the hashmap
+The number of nodes to borrow from the global list in a batch, ``target_free``,
+depends on the size of the map. Larger batch size reduces lock contention, but
+may also exhaust the global structure. The value is computed at map init to
+avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map
+size. With a minimum of a single element and maximum budget of 128 at a time.
+
This algorithm is described visually in the following diagram. See the
description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of
the corresponding operations:
diff --git a/Documentation/bpf/map_lpm_trie.rst b/Documentation/bpf/map_lpm_trie.rst
index 74d64a30f500..f9cd579496c9 100644
--- a/Documentation/bpf/map_lpm_trie.rst
+++ b/Documentation/bpf/map_lpm_trie.rst
@@ -17,7 +17,7 @@ significant byte.
LPM tries may be created with a maximum prefix length that is a multiple
of 8, in the range from 8 to 2048. The key used for lookup and update
-operations is a ``struct bpf_lpm_trie_key``, extended by
+operations is a ``struct bpf_lpm_trie_key_u8``, extended by
``max_prefixlen/8`` bytes.
- For IPv4 addresses the data length is 4 bytes
diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot
index a0fee349d29c..ab10058f5b79 100644
--- a/Documentation/bpf/map_lru_hash_update.dot
+++ b/Documentation/bpf/map_lru_hash_update.dot
@@ -35,18 +35,18 @@ digraph {
fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
label="Flush local pending,
Rotate Global list, move
- LOCAL_FREE_TARGET
+ target_free
from global -> local"]
// Also corresponds to:
// fn__local_list_flush()
// fn_bpf_lru_list_rotate()
fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
- label="Able to free\nLOCAL_FREE_TARGET\nnodes?"]
+ label="Able to free\ntarget_free\nnodes?"]
fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
label="Shrink inactive list
up to remaining
- LOCAL_FREE_TARGET
+ target_free
(global LRU -> local)"]
fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
label="> 0 entries in\nlocal free list?"]
diff --git a/Documentation/bpf/standardization/abi.rst b/Documentation/bpf/standardization/abi.rst
index 0c2e10eeb89a..41514137cb7b 100644
--- a/Documentation/bpf/standardization/abi.rst
+++ b/Documentation/bpf/standardization/abi.rst
@@ -23,3 +23,6 @@ The BPF calling convention is defined as:
R0 - R5 are scratch registers and BPF programs needs to spill/fill them if
necessary across calls.
+
+The BPF program needs to store the return value into register R0 before doing an
+``EXIT``.
diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index 245b6defc298..39c74611752b 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -1,15 +1,33 @@
.. contents::
.. sectnum::
-=======================================
-BPF Instruction Set Specification, v1.0
-=======================================
-
-This document specifies version 1.0 of the BPF instruction set.
+======================================
+BPF Instruction Set Architecture (ISA)
+======================================
+
+eBPF, also commonly
+referred to as BPF, is a technology with origins in the Linux kernel
+that can run untrusted programs in a privileged context such as an
+operating system kernel. This document specifies the BPF instruction
+set architecture (ISA).
+
+As a historical note, BPF originally stood for Berkeley Packet Filter,
+but now that it can do so much more than packet filtering, the acronym
+no longer makes sense. BPF is now considered a standalone term that
+does not stand for anything. The original BPF is sometimes referred to
+as cBPF (classic BPF) to distinguish it from the now widely deployed
+eBPF (extended BPF).
Documentation conventions
=========================
+The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
+"OPTIONAL" in this document are to be interpreted as described in
+BCP 14 `<https://www.rfc-editor.org/info/rfc2119>`_
+`<https://www.rfc-editor.org/info/rfc8174>`_
+when, and only when, they appear in all capitals, as shown here.
+
For brevity and consistency, this document refers to families
of types using a shorthand syntax and refers to several expository,
mnemonic functions when describing the semantics of instructions.
@@ -21,61 +39,60 @@ Types
This document refers to integer types with the notation `SN` to specify
a type's signedness (`S`) and bit width (`N`), respectively.
-.. table:: Meaning of signedness notation.
+.. table:: Meaning of signedness notation
==== =========
- `S` Meaning
+ S Meaning
==== =========
- `u` unsigned
- `s` signed
+ u unsigned
+ s signed
==== =========
-.. table:: Meaning of bit-width notation.
+.. table:: Meaning of bit-width notation
===== =========
- `N` Bit width
+ N Bit width
===== =========
- `8` 8 bits
- `16` 16 bits
- `32` 32 bits
- `64` 64 bits
- `128` 128 bits
+ 8 8 bits
+ 16 16 bits
+ 32 32 bits
+ 64 64 bits
+ 128 128 bits
===== =========
For example, `u32` is a type whose valid values are all the 32-bit unsigned
-numbers and `s16` is a types whose valid values are all the 16-bit signed
+numbers and `s16` is a type whose valid values are all the 16-bit signed
numbers.
Functions
---------
-* `htobe16`: Takes an unsigned 16-bit number in host-endian format and
- returns the equivalent number as an unsigned 16-bit number in big-endian
- format.
-* `htobe32`: Takes an unsigned 32-bit number in host-endian format and
- returns the equivalent number as an unsigned 32-bit number in big-endian
- format.
-* `htobe64`: Takes an unsigned 64-bit number in host-endian format and
- returns the equivalent number as an unsigned 64-bit number in big-endian
- format.
-* `htole16`: Takes an unsigned 16-bit number in host-endian format and
- returns the equivalent number as an unsigned 16-bit number in little-endian
- format.
-* `htole32`: Takes an unsigned 32-bit number in host-endian format and
- returns the equivalent number as an unsigned 32-bit number in little-endian
- format.
-* `htole64`: Takes an unsigned 64-bit number in host-endian format and
- returns the equivalent number as an unsigned 64-bit number in little-endian
- format.
-* `bswap16`: Takes an unsigned 16-bit number in either big- or little-endian
+
+The following byteswap functions are direction-agnostic. That is,
+the same function is used for conversion in either direction discussed
+below.
+
+* be16: Takes an unsigned 16-bit number and converts it between
+ host byte order and big-endian
+ (`IEN137 <https://www.rfc-editor.org/ien/ien137.txt>`_) byte order.
+* be32: Takes an unsigned 32-bit number and converts it between
+ host byte order and big-endian byte order.
+* be64: Takes an unsigned 64-bit number and converts it between
+ host byte order and big-endian byte order.
+* bswap16: Takes an unsigned 16-bit number in either big- or little-endian
format and returns the equivalent number with the same bit width but
opposite endianness.
-* `bswap32`: Takes an unsigned 32-bit number in either big- or little-endian
+* bswap32: Takes an unsigned 32-bit number in either big- or little-endian
format and returns the equivalent number with the same bit width but
opposite endianness.
-* `bswap64`: Takes an unsigned 64-bit number in either big- or little-endian
+* bswap64: Takes an unsigned 64-bit number in either big- or little-endian
format and returns the equivalent number with the same bit width but
opposite endianness.
-
+* le16: Takes an unsigned 16-bit number and converts it between
+ host byte order and little-endian byte order.
+* le32: Takes an unsigned 32-bit number and converts it between
+ host byte order and little-endian byte order.
+* le64: Takes an unsigned 64-bit number and converts it between
+ host byte order and little-endian byte order.
Definitions
-----------
@@ -97,40 +114,104 @@ Definitions
A: 10000110
B: 11111111 10000110
+Conformance groups
+------------------
+
+An implementation does not need to support all instructions specified in this
+document (e.g., deprecated instructions). Instead, a number of conformance
+groups are specified. An implementation MUST support the base32 conformance
+group and MAY support additional conformance groups, where supporting a
+conformance group means it MUST support all instructions in that conformance
+group.
+
+The use of named conformance groups enables interoperability between a runtime
+that executes instructions, and tools such as compilers that generate
+instructions for the runtime. Thus, capability discovery in terms of
+conformance groups might be done manually by users or automatically by tools.
+
+Each conformance group has a short ASCII label (e.g., "base32") that
+corresponds to a set of instructions that are mandatory. That is, each
+instruction has one or more conformance groups of which it is a member.
+
+This document defines the following conformance groups:
+
+* base32: includes all instructions defined in this
+ specification unless otherwise noted.
+* base64: includes base32, plus instructions explicitly noted
+ as being in the base64 conformance group.
+* atomic32: includes 32-bit atomic operation instructions (see `Atomic operations`_).
+* atomic64: includes atomic32, plus 64-bit atomic operation instructions.
+* divmul32: includes 32-bit division, multiplication, and modulo instructions.
+* divmul64: includes divmul32, plus 64-bit division, multiplication,
+ and modulo instructions.
+* packet: deprecated packet access instructions.
+
Instruction encoding
====================
BPF has two instruction encodings:
* the basic instruction encoding, which uses 64 bits to encode an instruction
-* the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
- constant) value after the basic instruction for a total of 128 bits.
+* the wide instruction encoding, which appends a second 64 bits
+ after the basic instruction for a total of 128 bits.
-The fields conforming an encoded basic instruction are stored in the
-following order::
+Basic instruction encoding
+--------------------------
- opcode:8 src_reg:4 dst_reg:4 offset:16 imm:32 // In little-endian BPF.
- opcode:8 dst_reg:4 src_reg:4 offset:16 imm:32 // In big-endian BPF.
+A basic instruction is encoded as follows::
-**imm**
- signed integer immediate value
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | opcode | regs | offset |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-**offset**
- signed integer offset used with pointer arithmetic
+**opcode**
+ operation to perform, encoded as follows::
-**src_reg**
- the source register number (0-10), except where otherwise specified
- (`64-bit immediate instructions`_ reuse this field for other purposes)
+ +-+-+-+-+-+-+-+-+
+ |specific |class|
+ +-+-+-+-+-+-+-+-+
-**dst_reg**
- destination register number (0-10)
+ **specific**
+ The format of these bits varies by instruction class
-**opcode**
- operation to perform
+ **class**
+ The instruction class (see `Instruction classes`_)
+
+**regs**
+ The source and destination register numbers, encoded as follows
+ on a little-endian host::
+
+ +-+-+-+-+-+-+-+-+
+ |src_reg|dst_reg|
+ +-+-+-+-+-+-+-+-+
+
+ and as follows on a big-endian host::
+
+ +-+-+-+-+-+-+-+-+
+ |dst_reg|src_reg|
+ +-+-+-+-+-+-+-+-+
+
+ **src_reg**
+ the source register number (0-10), except where otherwise specified
+ (`64-bit immediate instructions`_ reuse this field for other purposes)
-Note that the contents of multi-byte fields ('imm' and 'offset') are
-stored using big-endian byte ordering in big-endian BPF and
-little-endian byte ordering in little-endian BPF.
+ **dst_reg**
+ destination register number (0-10), unless otherwise specified
+ (future instructions might reuse this field for other purposes)
+
+**offset**
+ signed integer offset used with pointer arithmetic, except where
+ otherwise specified (some arithmetic instructions reuse this field
+ for other purposes)
+
+**imm**
+ signed integer immediate value
+
+Note that the contents of multi-byte fields ('offset' and 'imm') are
+stored using big-endian byte ordering on big-endian hosts and
+little-endian byte ordering on little-endian hosts.
For example::
@@ -141,74 +222,92 @@ For example::
07 1 0 00 00 11 22 33 44 r1 += 0x11223344 // big
Note that most instructions do not use all of the fields.
-Unused fields shall be cleared to zero.
+Unused fields SHALL be cleared to zero.
+
+Wide instruction encoding
+--------------------------
-As discussed below in `64-bit immediate instructions`_, a 64-bit immediate
-instruction uses a 64-bit immediate value that is constructed as follows.
-The 64 bits following the basic instruction contain a pseudo instruction
-using the same format but with opcode, dst_reg, src_reg, and offset all set to zero,
-and imm containing the high 32 bits of the immediate value.
+Some instructions are defined to use the wide instruction encoding,
+which uses two 32-bit immediate values. The 64 bits following
+the basic instruction format contain a pseudo instruction
+with 'opcode', 'dst_reg', 'src_reg', and 'offset' all set to zero.
This is depicted in the following figure::
- basic_instruction
- .-----------------------------.
- | |
- code:8 regs:8 offset:16 imm:32 unused:32 imm:32
- | |
- '--------------'
- pseudo instruction
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | opcode | regs | offset |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | next_imm |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-Thus the 64-bit immediate value is constructed as follows:
+**opcode**
+ operation to perform, encoded as explained above
+
+**regs**
+ The source and destination register numbers (unless otherwise
+ specified), encoded as explained above
+
+**offset**
+ signed integer offset used with pointer arithmetic, unless
+ otherwise specified
+
+**imm**
+ signed integer immediate value
- imm64 = (next_imm << 32) | imm
+**reserved**
+ unused, set to zero
-where 'next_imm' refers to the imm value of the pseudo instruction
-following the basic instruction. The unused bytes in the pseudo
-instruction are reserved and shall be cleared to zero.
+**next_imm**
+ second signed integer immediate value
Instruction classes
-------------------
-The three LSB bits of the 'opcode' field store the instruction class:
-
-========= ===== =============================== ===================================
-class value description reference
-========= ===== =============================== ===================================
-BPF_LD 0x00 non-standard load operations `Load and store instructions`_
-BPF_LDX 0x01 load into register operations `Load and store instructions`_
-BPF_ST 0x02 store from immediate operations `Load and store instructions`_
-BPF_STX 0x03 store from register operations `Load and store instructions`_
-BPF_ALU 0x04 32-bit arithmetic operations `Arithmetic and jump instructions`_
-BPF_JMP 0x05 64-bit jump operations `Arithmetic and jump instructions`_
-BPF_JMP32 0x06 32-bit jump operations `Arithmetic and jump instructions`_
-BPF_ALU64 0x07 64-bit arithmetic operations `Arithmetic and jump instructions`_
-========= ===== =============================== ===================================
+The three least significant bits of the 'opcode' field store the instruction class:
+
+.. table:: Instruction class
+
+ ===== ===== =============================== ===================================
+ class value description reference
+ ===== ===== =============================== ===================================
+ LD 0x0 non-standard load operations `Load and store instructions`_
+ LDX 0x1 load into register operations `Load and store instructions`_
+ ST 0x2 store from immediate operations `Load and store instructions`_
+ STX 0x3 store from register operations `Load and store instructions`_
+ ALU 0x4 32-bit arithmetic operations `Arithmetic and jump instructions`_
+ JMP 0x5 64-bit jump operations `Arithmetic and jump instructions`_
+ JMP32 0x6 32-bit jump operations `Arithmetic and jump instructions`_
+ ALU64 0x7 64-bit arithmetic operations `Arithmetic and jump instructions`_
+ ===== ===== =============================== ===================================
Arithmetic and jump instructions
================================
-For arithmetic and jump instructions (``BPF_ALU``, ``BPF_ALU64``, ``BPF_JMP`` and
-``BPF_JMP32``), the 8-bit 'opcode' field is divided into three parts:
+For arithmetic and jump instructions (``ALU``, ``ALU64``, ``JMP`` and
+``JMP32``), the 8-bit 'opcode' field is divided into three parts::
-============== ====== =================
-4 bits (MSB) 1 bit 3 bits (LSB)
-============== ====== =================
-code source instruction class
-============== ====== =================
+ +-+-+-+-+-+-+-+-+
+ | code |s|class|
+ +-+-+-+-+-+-+-+-+
**code**
the operation code, whose meaning varies by instruction class
-**source**
+**s (source)**
the source operand location, which unless otherwise specified is one of:
- ====== ===== ==============================================
- source value description
- ====== ===== ==============================================
- BPF_K 0x00 use 32-bit 'imm' value as source operand
- BPF_X 0x08 use 'src_reg' register value as source operand
- ====== ===== ==============================================
+ .. table:: Source operand location
+
+ ====== ===== ==============================================
+ source value description
+ ====== ===== ==============================================
+ K 0 use 32-bit 'imm' value as source operand
+ X 1 use 'src_reg' register value as source operand
+ ====== ===== ==============================================
**instruction class**
the instruction class (see `Instruction classes`_)
@@ -216,86 +315,119 @@ code source instruction class
Arithmetic instructions
-----------------------
-``BPF_ALU`` uses 32-bit wide operands while ``BPF_ALU64`` uses 64-bit wide operands for
-otherwise identical operations.
-The 'code' field encodes the operation as below, where 'src' and 'dst' refer
-to the values of the source and destination registers, respectively.
-
-========= ===== ======= ==========================================================
-code value offset description
-========= ===== ======= ==========================================================
-BPF_ADD 0x00 0 dst += src
-BPF_SUB 0x10 0 dst -= src
-BPF_MUL 0x20 0 dst \*= src
-BPF_DIV 0x30 0 dst = (src != 0) ? (dst / src) : 0
-BPF_SDIV 0x30 1 dst = (src != 0) ? (dst s/ src) : 0
-BPF_OR 0x40 0 dst \|= src
-BPF_AND 0x50 0 dst &= src
-BPF_LSH 0x60 0 dst <<= (src & mask)
-BPF_RSH 0x70 0 dst >>= (src & mask)
-BPF_NEG 0x80 0 dst = -dst
-BPF_MOD 0x90 0 dst = (src != 0) ? (dst % src) : dst
-BPF_SMOD 0x90 1 dst = (src != 0) ? (dst s% src) : dst
-BPF_XOR 0xa0 0 dst ^= src
-BPF_MOV 0xb0 0 dst = src
-BPF_MOVSX 0xb0 8/16/32 dst = (s8,s16,s32)src
-BPF_ARSH 0xc0 0 :term:`sign extending<Sign Extend>` dst >>= (src & mask)
-BPF_END 0xd0 0 byte swap operations (see `Byte swap instructions`_ below)
-========= ===== ======= ==========================================================
+``ALU`` uses 32-bit wide operands while ``ALU64`` uses 64-bit wide operands for
+otherwise identical operations. ``ALU64`` instructions belong to the
+base64 conformance group unless noted otherwise.
+The 'code' field encodes the operation as below, where 'src' refers to the
+the source operand and 'dst' refers to the value of the destination
+register.
+
+.. table:: Arithmetic instructions
+
+ ===== ===== ======= ===================================================================================
+ name code offset description
+ ===== ===== ======= ===================================================================================
+ ADD 0x0 0 dst += src
+ SUB 0x1 0 dst -= src
+ MUL 0x2 0 dst \*= src
+ DIV 0x3 0 dst = (src != 0) ? (dst / src) : 0
+ SDIV 0x3 1 dst = (src == 0) ? 0 : ((src == -1 && dst == LLONG_MIN) ? LLONG_MIN : (dst s/ src))
+ OR 0x4 0 dst \|= src
+ AND 0x5 0 dst &= src
+ LSH 0x6 0 dst <<= (src & mask)
+ RSH 0x7 0 dst >>= (src & mask)
+ NEG 0x8 0 dst = -dst
+ MOD 0x9 0 dst = (src != 0) ? (dst % src) : dst
+ SMOD 0x9 1 dst = (src == 0) ? dst : ((src == -1 && dst == LLONG_MIN) ? 0: (dst s% src))
+ XOR 0xa 0 dst ^= src
+ MOV 0xb 0 dst = src
+ MOVSX 0xb 8/16/32 dst = (s8,s16,s32)src
+ ARSH 0xc 0 :term:`sign extending<Sign Extend>` dst >>= (src & mask)
+ END 0xd 0 byte swap operations (see `Byte swap instructions`_ below)
+ ===== ===== ======= ===================================================================================
Underflow and overflow are allowed during arithmetic operations, meaning
the 64-bit or 32-bit value will wrap. If BPF program execution would
result in division by zero, the destination register is instead set to zero.
-If execution would result in modulo by zero, for ``BPF_ALU64`` the value of
-the destination register is unchanged whereas for ``BPF_ALU`` the upper
-32 bits of the destination register are zeroed.
+Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN``
+divided by -1, the destination register is instead set to ``LLONG_MIN``. For
+``ALU``, if execution would result in ``INT_MIN`` divided by -1, the
+destination register is instead set to ``INT_MIN``.
+
+If execution would result in modulo by zero, for ``ALU64`` the value of
+the destination register is unchanged whereas for ``ALU`` the upper
+32 bits of the destination register are zeroed. Otherwise, for ``ALU64``,
+if execution would resuslt in ``LLONG_MIN`` modulo -1, the destination
+register is instead set to 0. For ``ALU``, if execution would result in
+``INT_MIN`` modulo -1, the destination register is instead set to 0.
-``BPF_ADD | BPF_X | BPF_ALU`` means::
+``{ADD, X, ALU}``, where 'code' = ``ADD``, 'source' = ``X``, and 'class' = ``ALU``, means::
dst = (u32) ((u32) dst + (u32) src)
where '(u32)' indicates that the upper 32 bits are zeroed.
-``BPF_ADD | BPF_X | BPF_ALU64`` means::
+``{ADD, X, ALU64}`` means::
dst = dst + src
-``BPF_XOR | BPF_K | BPF_ALU`` means::
+``{XOR, K, ALU}`` means::
- dst = (u32) dst ^ (u32) imm32
+ dst = (u32) dst ^ (u32) imm
-``BPF_XOR | BPF_K | BPF_ALU64`` means::
+``{XOR, K, ALU64}`` means::
- dst = dst ^ imm32
+ dst = dst ^ imm
-Note that most instructions have instruction offset of 0. Only three instructions
-(``BPF_SDIV``, ``BPF_SMOD``, ``BPF_MOVSX``) have a non-zero offset.
+Note that most arithmetic instructions have 'offset' set to 0. Only three instructions
+(``SDIV``, ``SMOD``, ``MOVSX``) have a non-zero 'offset'.
+Division, multiplication, and modulo operations for ``ALU`` are part
+of the "divmul32" conformance group, and division, multiplication, and
+modulo operations for ``ALU64`` are part of the "divmul64" conformance
+group.
The division and modulo operations support both unsigned and signed flavors.
-For unsigned operations (``BPF_DIV`` and ``BPF_MOD``), for ``BPF_ALU``,
-'imm' is interpreted as a 32-bit unsigned value. For ``BPF_ALU64``,
+For unsigned operations (``DIV`` and ``MOD``), for ``ALU``,
+'imm' is interpreted as a 32-bit unsigned value. For ``ALU64``,
'imm' is first :term:`sign extended<Sign Extend>` from 32 to 64 bits, and then
interpreted as a 64-bit unsigned value.
-For signed operations (``BPF_SDIV`` and ``BPF_SMOD``), for ``BPF_ALU``,
-'imm' is interpreted as a 32-bit signed value. For ``BPF_ALU64``, 'imm'
+For signed operations (``SDIV`` and ``SMOD``), for ``ALU``,
+'imm' is interpreted as a 32-bit signed value. For ``ALU64``, 'imm'
is first :term:`sign extended<Sign Extend>` from 32 to 64 bits, and then
interpreted as a 64-bit signed value.
Note that there are varying definitions of the signed modulo operation
when the dividend or divisor are negative, where implementations often
vary by language such that Python, Ruby, etc. differ from C, Go, Java,
-etc. This specification requires that signed modulo use truncated division
-(where -13 % 3 == -1) as implemented in C, Go, etc.:
+etc. This specification requires that signed modulo MUST use truncated division
+(where -13 % 3 == -1) as implemented in C, Go, etc.::
a % n = a - n * trunc(a / n)
-The ``BPF_MOVSX`` instruction does a move operation with sign extension.
-``BPF_ALU | BPF_MOVSX`` :term:`sign extends<Sign Extend>` 8-bit and 16-bit operands into 32
-bit operands, and zeroes the remaining upper 32 bits.
-``BPF_ALU64 | BPF_MOVSX`` :term:`sign extends<Sign Extend>` 8-bit, 16-bit, and 32-bit
-operands into 64 bit operands.
+The ``MOVSX`` instruction does a move operation with sign extension.
+``{MOVSX, X, ALU}`` :term:`sign extends<Sign Extend>` 8-bit and 16-bit operands into
+32-bit operands, and zeroes the remaining upper 32 bits.
+``{MOVSX, X, ALU64}`` :term:`sign extends<Sign Extend>` 8-bit, 16-bit, and 32-bit
+operands into 64-bit operands. Unlike other arithmetic instructions,
+``MOVSX`` is only defined for register source operands (``X``).
+
+``{MOV, K, ALU64}`` means::
+
+ dst = (s64)imm
+
+``{MOV, X, ALU}`` means::
+
+ dst = (u32)src
+
+``{MOVSX, X, ALU}`` with 'offset' 8 means::
+
+ dst = (u32)(s32)(s8)src
+
+
+The ``NEG`` instruction is only defined when the source bit is clear
+(``K``).
Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31)
for 32-bit operations.
@@ -303,43 +435,47 @@ for 32-bit operations.
Byte swap instructions
----------------------
-The byte swap instructions use instruction classes of ``BPF_ALU`` and ``BPF_ALU64``
-and a 4-bit 'code' field of ``BPF_END``.
+The byte swap instructions use instruction classes of ``ALU`` and ``ALU64``
+and a 4-bit 'code' field of ``END``.
The byte swap instructions operate on the destination register
only and do not use a separate source register or immediate value.
-For ``BPF_ALU``, the 1-bit source operand field in the opcode is used to
+For ``ALU``, the 1-bit source operand field in the opcode is used to
select what byte order the operation converts from or to. For
-``BPF_ALU64``, the 1-bit source operand field in the opcode is reserved
-and must be set to 0.
+``ALU64``, the 1-bit source operand field in the opcode is reserved
+and MUST be set to 0.
+
+.. table:: Byte swap instructions
-========= ========= ===== =================================================
-class source value description
-========= ========= ===== =================================================
-BPF_ALU BPF_TO_LE 0x00 convert between host byte order and little endian
-BPF_ALU BPF_TO_BE 0x08 convert between host byte order and big endian
-BPF_ALU64 Reserved 0x00 do byte swap unconditionally
-========= ========= ===== =================================================
+ ===== ======== ===== =================================================
+ class source value description
+ ===== ======== ===== =================================================
+ ALU LE 0 convert between host byte order and little endian
+ ALU BE 1 convert between host byte order and big endian
+ ALU64 Reserved 0 do byte swap unconditionally
+ ===== ======== ===== =================================================
The 'imm' field encodes the width of the swap operations. The following widths
-are supported: 16, 32 and 64.
+are supported: 16, 32 and 64. Width 64 operations belong to the base64
+conformance group and other swap operations belong to the base32
+conformance group.
Examples:
-``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16/32/64 means::
+``{END, LE, ALU}`` with 'imm' = 16/32/64 means::
- dst = htole16(dst)
- dst = htole32(dst)
- dst = htole64(dst)
+ dst = le16(dst)
+ dst = le32(dst)
+ dst = le64(dst)
-``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 16/32/64 means::
+``{END, BE, ALU}`` with 'imm' = 16/32/64 means::
- dst = htobe16(dst)
- dst = htobe32(dst)
- dst = htobe64(dst)
+ dst = be16(dst)
+ dst = be32(dst)
+ dst = be64(dst)
-``BPF_ALU64 | BPF_TO_LE | BPF_END`` with imm = 16/32/64 means::
+``{END, TO, ALU64}`` with 'imm' = 16/32/64 means::
dst = bswap16(dst)
dst = bswap32(dst)
@@ -348,146 +484,175 @@ Examples:
Jump instructions
-----------------
-``BPF_JMP32`` uses 32-bit wide operands while ``BPF_JMP`` uses 64-bit wide operands for
-otherwise identical operations.
+``JMP32`` uses 32-bit wide operands and indicates the base32
+conformance group, while ``JMP`` uses 64-bit wide operands for
+otherwise identical operations, and indicates the base64 conformance
+group unless otherwise specified.
The 'code' field encodes the operation as below:
-======== ===== === =========================================== =========================================
-code value src description notes
-======== ===== === =========================================== =========================================
-BPF_JA 0x0 0x0 PC += offset BPF_JMP class
-BPF_JA 0x0 0x0 PC += imm BPF_JMP32 class
-BPF_JEQ 0x1 any PC += offset if dst == src
-BPF_JGT 0x2 any PC += offset if dst > src unsigned
-BPF_JGE 0x3 any PC += offset if dst >= src unsigned
-BPF_JSET 0x4 any PC += offset if dst & src
-BPF_JNE 0x5 any PC += offset if dst != src
-BPF_JSGT 0x6 any PC += offset if dst > src signed
-BPF_JSGE 0x7 any PC += offset if dst >= src signed
-BPF_CALL 0x8 0x0 call helper function by address see `Helper functions`_
-BPF_CALL 0x8 0x1 call PC += imm see `Program-local functions`_
-BPF_CALL 0x8 0x2 call helper function by BTF ID see `Helper functions`_
-BPF_EXIT 0x9 0x0 return BPF_JMP only
-BPF_JLT 0xa any PC += offset if dst < src unsigned
-BPF_JLE 0xb any PC += offset if dst <= src unsigned
-BPF_JSLT 0xc any PC += offset if dst < src signed
-BPF_JSLE 0xd any PC += offset if dst <= src signed
-======== ===== === =========================================== =========================================
-
-The BPF program needs to store the return value into register R0 before doing a
-``BPF_EXIT``.
+.. table:: Jump instructions
+
+ ======== ===== ======= ================================= ===================================================
+ code value src_reg description notes
+ ======== ===== ======= ================================= ===================================================
+ JA 0x0 0x0 PC += offset {JA, K, JMP} only
+ JA 0x0 0x0 PC += imm {JA, K, JMP32} only
+ JEQ 0x1 any PC += offset if dst == src
+ JGT 0x2 any PC += offset if dst > src unsigned
+ JGE 0x3 any PC += offset if dst >= src unsigned
+ JSET 0x4 any PC += offset if dst & src
+ JNE 0x5 any PC += offset if dst != src
+ JSGT 0x6 any PC += offset if dst > src signed
+ JSGE 0x7 any PC += offset if dst >= src signed
+ CALL 0x8 0x0 call helper function by static ID {CALL, K, JMP} only, see `Helper functions`_
+ CALL 0x8 0x1 call PC += imm {CALL, K, JMP} only, see `Program-local functions`_
+ CALL 0x8 0x2 call helper function by BTF ID {CALL, K, JMP} only, see `Helper functions`_
+ EXIT 0x9 0x0 return {CALL, K, JMP} only
+ JLT 0xa any PC += offset if dst < src unsigned
+ JLE 0xb any PC += offset if dst <= src unsigned
+ JSLT 0xc any PC += offset if dst < src signed
+ JSLE 0xd any PC += offset if dst <= src signed
+ ======== ===== ======= ================================= ===================================================
+
+where 'PC' denotes the program counter, and the offset to increment by
+is in units of 64-bit instructions relative to the instruction following
+the jump instruction. Thus 'PC += 1' skips execution of the next
+instruction if it's a basic instruction or results in undefined behavior
+if the next instruction is a 128-bit wide instruction.
Example:
-``BPF_JSGE | BPF_X | BPF_JMP32`` (0x7e) means::
+``{JSGE, X, JMP32}`` means::
if (s32)dst s>= (s32)src goto +offset
where 's>=' indicates a signed '>=' comparison.
-``BPF_JA | BPF_K | BPF_JMP32`` (0x06) means::
+``{JLE, K, JMP}`` means::
+
+ if dst <= (u64)(s64)imm goto +offset
+
+``{JA, K, JMP32}`` means::
gotol +imm
-where 'imm' means the branch offset comes from insn 'imm' field.
+where 'imm' means the branch offset comes from the 'imm' field.
-Note that there are two flavors of ``BPF_JA`` instructions. The
-``BPF_JMP`` class permits a 16-bit jump offset specified by the 'offset'
-field, whereas the ``BPF_JMP32`` class permits a 32-bit jump offset
+Note that there are two flavors of ``JA`` instructions. The
+``JMP`` class permits a 16-bit jump offset specified by the 'offset'
+field, whereas the ``JMP32`` class permits a 32-bit jump offset
specified by the 'imm' field. A > 16-bit conditional jump may be
converted to a < 16-bit conditional jump plus a 32-bit unconditional
jump.
+All ``CALL`` and ``JA`` instructions belong to the
+base32 conformance group.
+
Helper functions
~~~~~~~~~~~~~~~~
Helper functions are a concept whereby BPF programs can call into a
set of function calls exposed by the underlying platform.
-Historically, each helper function was identified by an address
-encoded in the imm field. The available helper functions may differ
-for each program type, but address values are unique across all program types.
+Historically, each helper function was identified by a static ID
+encoded in the 'imm' field. Further documentation of helper functions
+is outside the scope of this document and standardization is left for
+future work, but use is widely deployed and more information can be
+found in platform-specific documentation (e.g., Linux kernel documentation).
Platforms that support the BPF Type Format (BTF) support identifying
-a helper function by a BTF ID encoded in the imm field, where the BTF ID
-identifies the helper name and type.
+a helper function by a BTF ID encoded in the 'imm' field, where the BTF ID
+identifies the helper name and type. Further documentation of BTF
+is outside the scope of this document and standardization is left for
+future work, but use is widely deployed and more information can be
+found in platform-specific documentation (e.g., Linux kernel documentation).
Program-local functions
~~~~~~~~~~~~~~~~~~~~~~~
Program-local functions are functions exposed by the same BPF program as the
-caller, and are referenced by offset from the call instruction, similar to
-``BPF_JA``. The offset is encoded in the imm field of the call instruction.
-A ``BPF_EXIT`` within the program-local function will return to the caller.
+caller, and are referenced by offset from the instruction following the call
+instruction, similar to ``JA``. The offset is encoded in the 'imm' field of
+the call instruction. An ``EXIT`` within the program-local function will
+return to the caller.
Load and store instructions
===========================
-For load and store instructions (``BPF_LD``, ``BPF_LDX``, ``BPF_ST``, and ``BPF_STX``), the
-8-bit 'opcode' field is divided as:
-
-============ ====== =================
-3 bits (MSB) 2 bits 3 bits (LSB)
-============ ====== =================
-mode size instruction class
-============ ====== =================
-
-The mode modifier is one of:
-
- ============= ===== ==================================== =============
- mode modifier value description reference
- ============= ===== ==================================== =============
- BPF_IMM 0x00 64-bit immediate instructions `64-bit immediate instructions`_
- BPF_ABS 0x20 legacy BPF packet access (absolute) `Legacy BPF Packet access instructions`_
- BPF_IND 0x40 legacy BPF packet access (indirect) `Legacy BPF Packet access instructions`_
- BPF_MEM 0x60 regular load and store operations `Regular load and store operations`_
- BPF_MEMSX 0x80 sign-extension load operations `Sign-extension load operations`_
- BPF_ATOMIC 0xc0 atomic operations `Atomic operations`_
- ============= ===== ==================================== =============
-
-The size modifier is one of:
-
- ============= ===== =====================
- size modifier value description
- ============= ===== =====================
- BPF_W 0x00 word (4 bytes)
- BPF_H 0x08 half word (2 bytes)
- BPF_B 0x10 byte
- BPF_DW 0x18 double word (8 bytes)
- ============= ===== =====================
+For load and store instructions (``LD``, ``LDX``, ``ST``, and ``STX``), the
+8-bit 'opcode' field is divided as follows::
+
+ +-+-+-+-+-+-+-+-+
+ |mode |sz |class|
+ +-+-+-+-+-+-+-+-+
+
+**mode**
+ The mode modifier is one of:
+
+ .. table:: Mode modifier
+
+ ============= ===== ==================================== =============
+ mode modifier value description reference
+ ============= ===== ==================================== =============
+ IMM 0 64-bit immediate instructions `64-bit immediate instructions`_
+ ABS 1 legacy BPF packet access (absolute) `Legacy BPF Packet access instructions`_
+ IND 2 legacy BPF packet access (indirect) `Legacy BPF Packet access instructions`_
+ MEM 3 regular load and store operations `Regular load and store operations`_
+ MEMSX 4 sign-extension load operations `Sign-extension load operations`_
+ ATOMIC 6 atomic operations `Atomic operations`_
+ ============= ===== ==================================== =============
+
+**sz (size)**
+ The size modifier is one of:
+
+ .. table:: Size modifier
+
+ ==== ===== =====================
+ size value description
+ ==== ===== =====================
+ W 0 word (4 bytes)
+ H 1 half word (2 bytes)
+ B 2 byte
+ DW 3 double word (8 bytes)
+ ==== ===== =====================
+
+ Instructions using ``DW`` belong to the base64 conformance group.
+
+**class**
+ The instruction class (see `Instruction classes`_)
Regular load and store operations
---------------------------------
-The ``BPF_MEM`` mode modifier is used to encode regular load and store
+The ``MEM`` mode modifier is used to encode regular load and store
instructions that transfer data between a register and memory.
-``BPF_MEM | <size> | BPF_STX`` means::
+``{MEM, <size>, STX}`` means::
*(size *) (dst + offset) = src
-``BPF_MEM | <size> | BPF_ST`` means::
+``{MEM, <size>, ST}`` means::
- *(size *) (dst + offset) = imm32
+ *(size *) (dst + offset) = imm
-``BPF_MEM | <size> | BPF_LDX`` means::
+``{MEM, <size>, LDX}`` means::
dst = *(unsigned size *) (src + offset)
-Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW`` and
-'unsigned size' is one of u8, u16, u32 or u64.
+Where '<size>' is one of: ``B``, ``H``, ``W``, or ``DW``, and
+'unsigned size' is one of: u8, u16, u32, or u64.
Sign-extension load operations
------------------------------
-The ``BPF_MEMSX`` mode modifier is used to encode :term:`sign-extension<Sign Extend>` load
+The ``MEMSX`` mode modifier is used to encode :term:`sign-extension<Sign Extend>` load
instructions that transfer data between a register and memory.
-``BPF_MEMSX | <size> | BPF_LDX`` means::
+``{MEMSX, <size>, LDX}`` means::
dst = *(signed size *) (src + offset)
-Where size is one of: ``BPF_B``, ``BPF_H`` or ``BPF_W``, and
-'signed size' is one of s8, s16 or s32.
+Where '<size>' is one of: ``B``, ``H``, or ``W``, and
+'signed size' is one of: s8, s16, or s32.
Atomic operations
-----------------
@@ -497,54 +662,60 @@ interrupted or corrupted by other access to the same memory region
by other BPF programs or means outside of this specification.
All atomic operations supported by BPF are encoded as store operations
-that use the ``BPF_ATOMIC`` mode modifier as follows:
+that use the ``ATOMIC`` mode modifier as follows:
-* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
-* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations
+* ``{ATOMIC, W, STX}`` for 32-bit operations, which are
+ part of the "atomic32" conformance group.
+* ``{ATOMIC, DW, STX}`` for 64-bit operations, which are
+ part of the "atomic64" conformance group.
* 8-bit and 16-bit wide atomic operations are not supported.
The 'imm' field is used to encode the actual atomic operation.
Simple atomic operation use a subset of the values defined to encode
arithmetic operations in the 'imm' field to encode the atomic operation:
-======== ===== ===========
-imm value description
-======== ===== ===========
-BPF_ADD 0x00 atomic add
-BPF_OR 0x40 atomic or
-BPF_AND 0x50 atomic and
-BPF_XOR 0xa0 atomic xor
-======== ===== ===========
+.. table:: Simple atomic operations
+ ======== ===== ===========
+ imm value description
+ ======== ===== ===========
+ ADD 0x00 atomic add
+ OR 0x40 atomic or
+ AND 0x50 atomic and
+ XOR 0xa0 atomic xor
+ ======== ===== ===========
-``BPF_ATOMIC | BPF_W | BPF_STX`` with 'imm' = BPF_ADD means::
+
+``{ATOMIC, W, STX}`` with 'imm' = ADD means::
*(u32 *)(dst + offset) += src
-``BPF_ATOMIC | BPF_DW | BPF_STX`` with 'imm' = BPF ADD means::
+``{ATOMIC, DW, STX}`` with 'imm' = ADD means::
*(u64 *)(dst + offset) += src
In addition to the simple atomic operations, there also is a modifier and
two complex atomic operations:
-=========== ================ ===========================
-imm value description
-=========== ================ ===========================
-BPF_FETCH 0x01 modifier: return old value
-BPF_XCHG 0xe0 | BPF_FETCH atomic exchange
-BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange
-=========== ================ ===========================
+.. table:: Complex atomic operations
+
+ =========== ================ ===========================
+ imm value description
+ =========== ================ ===========================
+ FETCH 0x01 modifier: return old value
+ XCHG 0xe0 | FETCH atomic exchange
+ CMPXCHG 0xf0 | FETCH atomic compare and exchange
+ =========== ================ ===========================
-The ``BPF_FETCH`` modifier is optional for simple atomic operations, and
-always set for the complex atomic operations. If the ``BPF_FETCH`` flag
+The ``FETCH`` modifier is optional for simple atomic operations, and
+always set for the complex atomic operations. If the ``FETCH`` flag
is set, then the operation also overwrites ``src`` with the value that
was in memory before it was modified.
-The ``BPF_XCHG`` operation atomically exchanges ``src`` with the value
+The ``XCHG`` operation atomically exchanges ``src`` with the value
addressed by ``dst + offset``.
-The ``BPF_CMPXCHG`` operation atomically compares the value addressed by
+The ``CMPXCHG`` operation atomically compares the value addressed by
``dst + offset`` with ``R0``. If they match, the value addressed by
``dst + offset`` is replaced with ``src``. In either case, the
value that was at ``dst + offset`` before the operation is zero-extended
@@ -553,25 +724,27 @@ and loaded back to ``R0``.
64-bit immediate instructions
-----------------------------
-Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction
-encoding defined in `Instruction encoding`_, and use the 'src' field of the
+Instructions with the ``IMM`` 'mode' modifier use the wide instruction
+encoding defined in `Instruction encoding`_, and use the 'src_reg' field of the
basic instruction to hold an opcode subtype.
-The following table defines a set of ``BPF_IMM | BPF_DW | BPF_LD`` instructions
-with opcode subtypes in the 'src' field, using new terms such as "map"
+The following table defines a set of ``{IMM, DW, LD}`` instructions
+with opcode subtypes in the 'src_reg' field, using new terms such as "map"
defined further below:
-========================= ====== === ========================================= =========== ==============
-opcode construction opcode src pseudocode imm type dst type
-========================= ====== === ========================================= =========== ==============
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x0 dst = imm64 integer integer
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x1 dst = map_by_fd(imm) map fd map
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data pointer
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = var_addr(imm) variable id data pointer
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x4 dst = code_addr(imm) integer code pointer
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x5 dst = map_by_idx(imm) map index map
-BPF_IMM | BPF_DW | BPF_LD 0x18 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data pointer
-========================= ====== === ========================================= =========== ==============
+.. table:: 64-bit immediate instructions
+
+ ======= ========================================= =========== ==============
+ src_reg pseudocode imm type dst type
+ ======= ========================================= =========== ==============
+ 0x0 dst = (next_imm << 32) | imm integer integer
+ 0x1 dst = map_by_fd(imm) map fd map
+ 0x2 dst = map_val(map_by_fd(imm)) + next_imm map fd data address
+ 0x3 dst = var_addr(imm) variable id data address
+ 0x4 dst = code_addr(imm) integer code address
+ 0x5 dst = map_by_idx(imm) map index map
+ 0x6 dst = map_val(map_by_idx(imm)) + next_imm map index data address
+ ======= ========================================= =========== ==============
where
@@ -609,5 +782,9 @@ Legacy BPF Packet access instructions
-------------------------------------
BPF previously introduced special instructions for access to packet data that were
-carried over from classic BPF. However, these instructions are
-deprecated and should no longer be used.
+carried over from classic BPF. These instructions used an instruction
+class of ``LD``, a size modifier of ``W``, ``H``, or ``B``, and a
+mode modifier of ``ABS`` or ``IND``. The 'dst_reg' and 'offset' fields were
+set to zero, and 'src_reg' was set to zero for ``ABS``. However, these
+instructions are deprecated and SHOULD no longer be used. All legacy packet
+access instructions belong to the "packet" conformance group.
diff --git a/Documentation/bpf/verifier.rst b/Documentation/bpf/verifier.rst
index f0ec19db301c..510d15bc697b 100644
--- a/Documentation/bpf/verifier.rst
+++ b/Documentation/bpf/verifier.rst
@@ -347,270 +347,6 @@ However, only the value of register ``r1`` is important to successfully finish
verification. The goal of the liveness tracking algorithm is to spot this fact
and figure out that both states are actually equivalent.
-Data structures
-~~~~~~~~~~~~~~~
-
-Liveness is tracked using the following data structures::
-
- enum bpf_reg_liveness {
- REG_LIVE_NONE = 0,
- REG_LIVE_READ32 = 0x1,
- REG_LIVE_READ64 = 0x2,
- REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
- REG_LIVE_WRITTEN = 0x4,
- REG_LIVE_DONE = 0x8,
- };
-
- struct bpf_reg_state {
- ...
- struct bpf_reg_state *parent;
- ...
- enum bpf_reg_liveness live;
- ...
- };
-
- struct bpf_stack_state {
- struct bpf_reg_state spilled_ptr;
- ...
- };
-
- struct bpf_func_state {
- struct bpf_reg_state regs[MAX_BPF_REG];
- ...
- struct bpf_stack_state *stack;
- }
-
- struct bpf_verifier_state {
- struct bpf_func_state *frame[MAX_CALL_FRAMES];
- struct bpf_verifier_state *parent;
- ...
- }
-
-* ``REG_LIVE_NONE`` is an initial value assigned to ``->live`` fields upon new
- verifier state creation;
-
-* ``REG_LIVE_WRITTEN`` means that the value of the register (or stack slot) is
- defined by some instruction verified between this verifier state's parent and
- verifier state itself;
-
-* ``REG_LIVE_READ{32,64}`` means that the value of the register (or stack slot)
- is read by a some child state of this verifier state;
-
-* ``REG_LIVE_DONE`` is a marker used by ``clean_verifier_state()`` to avoid
- processing same verifier state multiple times and for some sanity checks;
-
-* ``->live`` field values are formed by combining ``enum bpf_reg_liveness``
- values using bitwise or.
-
-Register parentage chains
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In order to propagate information between parent and child states, a *register
-parentage chain* is established. Each register or stack slot is linked to a
-corresponding register or stack slot in its parent state via a ``->parent``
-pointer. This link is established upon state creation in ``is_state_visited()``
-and might be modified by ``set_callee_state()`` called from
-``__check_func_call()``.
-
-The rules for correspondence between registers / stack slots are as follows:
-
-* For the current stack frame, registers and stack slots of the new state are
- linked to the registers and stack slots of the parent state with the same
- indices.
-
-* For the outer stack frames, only caller saved registers (r6-r9) and stack
- slots are linked to the registers and stack slots of the parent state with the
- same indices.
-
-* When function call is processed a new ``struct bpf_func_state`` instance is
- allocated, it encapsulates a new set of registers and stack slots. For this
- new frame, parent links for r6-r9 and stack slots are set to nil, parent links
- for r1-r5 are set to match caller r1-r5 parent links.
-
-This could be illustrated by the following diagram (arrows stand for
-``->parent`` pointers)::
-
- ... ; Frame #0, some instructions
- --- checkpoint #0 ---
- 1 : r6 = 42 ; Frame #0
- --- checkpoint #1 ---
- 2 : call foo() ; Frame #0
- ... ; Frame #1, instructions from foo()
- --- checkpoint #2 ---
- ... ; Frame #1, instructions from foo()
- --- checkpoint #3 ---
- exit ; Frame #1, return from foo()
- 3 : r1 = r6 ; Frame #0 <- current state
-
- +-------------------------------+-------------------------------+
- | Frame #0 | Frame #1 |
- Checkpoint +-------------------------------+-------------------------------+
- #0 | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+
- ^ ^ ^ ^
- | | | |
- Checkpoint +-------------------------------+
- #1 | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+
- ^ ^ ^
- |_______|_______|_______________
- | | |
- nil nil | | | nil nil
- | | | | | | |
- Checkpoint +-------------------------------+-------------------------------+
- #2 | r0 | r1-r5 | r6-r9 | fp-8 ... | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+-------------------------------+
- ^ ^ ^ ^ ^
- nil nil | | | | |
- | | | | | | |
- Checkpoint +-------------------------------+-------------------------------+
- #3 | r0 | r1-r5 | r6-r9 | fp-8 ... | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+-------------------------------+
- ^ ^
- nil nil | |
- | | | |
- Current +-------------------------------+
- state | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+
- \
- r6 read mark is propagated via these links
- all the way up to checkpoint #1.
- The checkpoint #1 contains a write mark for r6
- because of instruction (1), thus read propagation
- does not reach checkpoint #0 (see section below).
-
-Liveness marks tracking
-~~~~~~~~~~~~~~~~~~~~~~~
-
-For each processed instruction, the verifier tracks read and written registers
-and stack slots. The main idea of the algorithm is that read marks propagate
-back along the state parentage chain until they hit a write mark, which 'screens
-off' earlier states from the read. The information about reads is propagated by
-function ``mark_reg_read()`` which could be summarized as follows::
-
- mark_reg_read(struct bpf_reg_state *state, ...):
- parent = state->parent
- while parent:
- if state->live & REG_LIVE_WRITTEN:
- break
- if parent->live & REG_LIVE_READ64:
- break
- parent->live |= REG_LIVE_READ64
- state = parent
- parent = state->parent
-
-Notes:
-
-* The read marks are applied to the **parent** state while write marks are
- applied to the **current** state. The write mark on a register or stack slot
- means that it is updated by some instruction in the straight-line code leading
- from the parent state to the current state.
-
-* Details about REG_LIVE_READ32 are omitted.
-
-* Function ``propagate_liveness()`` (see section :ref:`read_marks_for_cache_hits`)
- might override the first parent link. Please refer to the comments in the
- ``propagate_liveness()`` and ``mark_reg_read()`` source code for further
- details.
-
-Because stack writes could have different sizes ``REG_LIVE_WRITTEN`` marks are
-applied conservatively: stack slots are marked as written only if write size
-corresponds to the size of the register, e.g. see function ``save_register_state()``.
-
-Consider the following example::
-
- 0: (*u64)(r10 - 8) = 0 ; define 8 bytes of fp-8
- --- checkpoint #0 ---
- 1: (*u32)(r10 - 8) = 1 ; redefine lower 4 bytes
- 2: r1 = (*u32)(r10 - 8) ; read lower 4 bytes defined at (1)
- 3: r2 = (*u32)(r10 - 4) ; read upper 4 bytes defined at (0)
-
-As stated above, the write at (1) does not count as ``REG_LIVE_WRITTEN``. Should
-it be otherwise, the algorithm above wouldn't be able to propagate the read mark
-from (3) to checkpoint #0.
-
-Once the ``BPF_EXIT`` instruction is reached ``update_branch_counts()`` is
-called to update the ``->branches`` counter for each verifier state in a chain
-of parent verifier states. When the ``->branches`` counter reaches zero the
-verifier state becomes a valid entry in a set of cached verifier states.
-
-Each entry of the verifier states cache is post-processed by a function
-``clean_live_states()``. This function marks all registers and stack slots
-without ``REG_LIVE_READ{32,64}`` marks as ``NOT_INIT`` or ``STACK_INVALID``.
-Registers/stack slots marked in this way are ignored in function ``stacksafe()``
-called from ``states_equal()`` when a state cache entry is considered for
-equivalence with a current state.
-
-Now it is possible to explain how the example from the beginning of the section
-works::
-
- 0: call bpf_get_prandom_u32()
- 1: r1 = 0
- 2: if r0 == 0 goto +1
- 3: r0 = 1
- --- checkpoint[0] ---
- 4: r0 = r1
- 5: exit
-
-* At instruction #2 branching point is reached and state ``{ r0 == 0, r1 == 0, pc == 4 }``
- is pushed to states processing queue (pc stands for program counter).
-
-* At instruction #4:
-
- * ``checkpoint[0]`` states cache entry is created: ``{ r0 == 1, r1 == 0, pc == 4 }``;
- * ``checkpoint[0].r0`` is marked as written;
- * ``checkpoint[0].r1`` is marked as read;
-
-* At instruction #5 exit is reached and ``checkpoint[0]`` can now be processed
- by ``clean_live_states()``. After this processing ``checkpoint[0].r0`` has a
- read mark and all other registers and stack slots are marked as ``NOT_INIT``
- or ``STACK_INVALID``
-
-* The state ``{ r0 == 0, r1 == 0, pc == 4 }`` is popped from the states queue
- and is compared against a cached state ``{ r1 == 0, pc == 4 }``, the states
- are considered equivalent.
-
-.. _read_marks_for_cache_hits:
-
-Read marks propagation for cache hits
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Another point is the handling of read marks when a previously verified state is
-found in the states cache. Upon cache hit verifier must behave in the same way
-as if the current state was verified to the program exit. This means that all
-read marks, present on registers and stack slots of the cached state, must be
-propagated over the parentage chain of the current state. Example below shows
-why this is important. Function ``propagate_liveness()`` handles this case.
-
-Consider the following state parentage chain (S is a starting state, A-E are
-derived states, -> arrows show which state is derived from which)::
-
- r1 read
- <------------- A[r1] == 0
- C[r1] == 0
- S ---> A ---> B ---> exit E[r1] == 1
- |
- ` ---> C ---> D
- |
- ` ---> E ^
- |___ suppose all these
- ^ states are at insn #Y
- |
- suppose all these
- states are at insn #X
-
-* Chain of states ``S -> A -> B -> exit`` is verified first.
-
-* While ``B -> exit`` is verified, register ``r1`` is read and this read mark is
- propagated up to state ``A``.
-
-* When chain of states ``C -> D`` is verified the state ``D`` turns out to be
- equivalent to state ``B``.
-
-* The read mark for ``r1`` has to be propagated to state ``C``, otherwise state
- ``C`` might get mistakenly marked as equivalent to state ``E`` even though
- values for register ``r1`` differ between ``C`` and ``E``.
-
Understanding eBPF verifier messages
====================================
diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
index 7964fe134277..b97a4e9b9bd3 100644
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -217,7 +217,7 @@ current *struct* is::
int (*media_changed)(struct cdrom_device_info *, int);
int (*tray_move)(struct cdrom_device_info *, int);
int (*lock_door)(struct cdrom_device_info *, int);
- int (*select_speed)(struct cdrom_device_info *, int);
+ int (*select_speed)(struct cdrom_device_info *, unsigned long);
int (*get_last_session) (struct cdrom_device_info *,
struct cdrom_multisession *);
int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
@@ -273,7 +273,6 @@ The drive-specific, minor-like information that is registered with
__u8 media_written; /* dirty flag, DVD+RW bookkeeping */
unsigned short mmc3_profile; /* current MMC3 profile */
int for_data; /* unknown:TBD */
- int (*exit)(struct cdrom_device_info *);/* unknown:TBD */
int mrw_mode_page; /* which MRW mode page is in use */
};
@@ -396,7 +395,7 @@ action need be taken, and the return value should be 0.
::
- int select_speed(struct cdrom_device_info *cdi, int speed)
+ int select_speed(struct cdrom_device_info *cdi, unsigned long speed)
Some CD-ROM drives are capable of changing their head-speed. There
are several reasons for changing the speed of a CD-ROM drive. Badly
diff --git a/Documentation/cdrom/index.rst b/Documentation/cdrom/index.rst
index e9b022d70939..3ac4f716612f 100644
--- a/Documentation/cdrom/index.rst
+++ b/Documentation/cdrom/index.rst
@@ -8,7 +8,6 @@ CD-ROM
:maxdepth: 1
cdrom-standard
- packet-writing
.. only:: subproject and html
diff --git a/Documentation/cdrom/packet-writing.rst b/Documentation/cdrom/packet-writing.rst
deleted file mode 100644
index 43db58c50d29..000000000000
--- a/Documentation/cdrom/packet-writing.rst
+++ /dev/null
@@ -1,139 +0,0 @@
-==============
-Packet writing
-==============
-
-Getting started quick
----------------------
-
-- Select packet support in the block device section and UDF support in
- the file system section.
-
-- Compile and install kernel and modules, reboot.
-
-- You need the udftools package (pktsetup, mkudffs, cdrwtool).
- Download from https://github.com/pali/udftools
-
-- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
- as appropriate)::
-
- # cdrwtool -d /dev/hdc -q
-
-- Setup your writer::
-
- # pktsetup dev_name /dev/hdc
-
-- Now you can mount /dev/pktcdvd/dev_name and copy files to it. Enjoy::
-
- # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD-RW media
--------------------------------
-
-DVD-RW discs can be written to much like CD-RW discs if they are in
-the so called "restricted overwrite" mode. To put a disc in restricted
-overwrite mode, run::
-
- # dvd+rw-format /dev/hdc
-
-You can then use the disc the same way you would use a CD-RW disc::
-
- # pktsetup dev_name /dev/hdc
- # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD+RW media
--------------------------------
-
-According to the DVD+RW specification, a drive supporting DVD+RW discs
-shall implement "true random writes with 2KB granularity", which means
-that it should be possible to put any filesystem with a block size >=
-2KB on such a disc. For example, it should be possible to do::
-
- # dvd+rw-format /dev/hdc (only needed if the disc has never
- been formatted)
- # mkudffs /dev/hdc
- # mount /dev/hdc /cdrom -t udf -o rw,noatime
-
-However, some drives don't follow the specification and expect the
-host to perform aligned writes at 32KB boundaries. Other drives do
-follow the specification, but suffer bad performance problems if the
-writes are not 32KB aligned.
-
-Both problems can be solved by using the pktcdvd driver, which always
-generates aligned writes::
-
- # dvd+rw-format /dev/hdc
- # pktsetup dev_name /dev/hdc
- # mkudffs /dev/pktcdvd/dev_name
- # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
-
-
-Packet writing for DVD-RAM media
---------------------------------
-
-DVD-RAM discs are random writable, so using the pktcdvd driver is not
-necessary. However, using the pktcdvd driver can improve performance
-in the same way it does for DVD+RW media.
-
-
-Notes
------
-
-- CD-RW media can usually not be overwritten more than about 1000
- times, so to avoid unnecessary wear on the media, you should always
- use the noatime mount option.
-
-- Defect management (ie automatic remapping of bad sectors) has not
- been implemented yet, so you are likely to get at least some
- filesystem corruption if the disc wears out.
-
-- Since the pktcdvd driver makes the disc appear as a regular block
- device with a 2KB block size, you can put any filesystem you like on
- the disc. For example, run::
-
- # /sbin/mke2fs /dev/pktcdvd/dev_name
-
- to create an ext2 filesystem on the disc.
-
-
-Using the pktcdvd sysfs interface
----------------------------------
-
-Since Linux 2.6.20, the pktcdvd module has a sysfs interface
-and can be controlled by it. For example the "pktcdvd" tool uses
-this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd )
-
-"pktcdvd" works similar to "pktsetup", e.g.::
-
- # pktcdvd -a dev_name /dev/hdc
- # mkudffs /dev/pktcdvd/dev_name
- # mount -t udf -o rw,noatime /dev/pktcdvd/dev_name /dvdram
- # cp files /dvdram
- # umount /dvdram
- # pktcdvd -r dev_name
-
-
-For a description of the sysfs interface look into the file:
-
- Documentation/ABI/testing/sysfs-class-pktcdvd
-
-
-Using the pktcdvd debugfs interface
------------------------------------
-
-To read pktcdvd device infos in human readable form, do::
-
- # cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info
-
-For a description of the debugfs interface look into the file:
-
- Documentation/ABI/testing/debugfs-pktcdvd
-
-
-
-Links
------
-
-See http://fy.chalmers.se/~appro/linux/DVD+RW/ for more information
-about DVD writing.
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 5830b01c5642..1ea2ae5c6276 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -1,25 +1,134 @@
-# -*- coding: utf-8 -*-
-#
-# The Linux Kernel documentation build configuration file, created by
-# sphinx-quickstart on Fri Feb 12 13:51:46 2016.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
+# SPDX-License-Identifier: GPL-2.0-only
+# pylint: disable=C0103,C0209
+
+"""
+The Linux Kernel documentation build configuration file.
+"""
-import sys
import os
-import sphinx
import shutil
+import sys
+
+from textwrap import dedent
+
+import sphinx
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath("sphinx"))
+
+# Minimal supported version
+needs_sphinx = "3.4.3"
+
+# Get Sphinx version
+major, minor, patch = sphinx.version_info[:3] # pylint: disable=I1101
+
+# Include_patterns were added on Sphinx 5.1
+if (major < 5) or (major == 5 and minor < 1):
+ has_include_patterns = False
+else:
+ has_include_patterns = True
+ # Include patterns that don't contain directory names, in glob format
+ include_patterns = ["**.rst"]
+
+# Location of Documentation/ directory
+doctree = os.path.abspath(".")
+
+# Exclude of patterns that don't contain directory names, in glob format.
+exclude_patterns = []
+
+# List of patterns that contain directory names in glob format.
+dyn_include_patterns = []
+dyn_exclude_patterns = ["output"]
+
+# Currently, only netlink/specs has a parser for yaml.
+# Prefer using include patterns if available, as it is faster
+if has_include_patterns:
+ dyn_include_patterns.append("netlink/specs/*.yaml")
+else:
+ dyn_exclude_patterns.append("netlink/*.yaml")
+ dyn_exclude_patterns.append("devicetree/bindings/**.yaml")
+ dyn_exclude_patterns.append("core-api/kho/bindings/**.yaml")
+
+# Properly handle directory patterns and LaTeX docs
+# -------------------------------------------------
+
+def config_init(app, config):
+ """
+ Initialize path-dependent variabled
+
+ On Sphinx, all directories are relative to what it is passed as
+ SOURCEDIR parameter for sphinx-build. Due to that, all patterns
+ that have directory names on it need to be dynamically set, after
+ converting them to a relative patch.
+
+ As Sphinx doesn't include any patterns outside SOURCEDIR, we should
+ exclude relative patterns that start with "../".
+ """
+
+ # setup include_patterns dynamically
+ if has_include_patterns:
+ for p in dyn_include_patterns:
+ full = os.path.join(doctree, p)
+
+ rel_path = os.path.relpath(full, start=app.srcdir)
+ if rel_path.startswith("../"):
+ continue
+
+ config.include_patterns.append(rel_path)
+
+ # setup exclude_patterns dynamically
+ for p in dyn_exclude_patterns:
+ full = os.path.join(doctree, p)
+
+ rel_path = os.path.relpath(full, start=app.srcdir)
+ if rel_path.startswith("../"):
+ continue
+
+ config.exclude_patterns.append(rel_path)
+
+ # LaTeX and PDF output require a list of documents with are dependent
+ # of the app.srcdir. Add them here
+
+ # Handle the case where SPHINXDIRS is used
+ if not os.path.samefile(doctree, app.srcdir):
+ # Add a tag to mark that the build is actually a subproject
+ tags.add("subproject")
+
+ # get index.rst, if it exists
+ doc = os.path.basename(app.srcdir)
+ fname = "index"
+ if os.path.exists(os.path.join(app.srcdir, fname + ".rst")):
+ latex_documents.append((fname, doc + ".tex",
+ "Linux %s Documentation" % doc.capitalize(),
+ "The kernel development community",
+ "manual"))
+ return
+
+ # When building all docs, or when a main index.rst doesn't exist, seek
+ # for it on subdirectories
+ for doc in os.listdir(app.srcdir):
+ fname = os.path.join(doc, "index")
+ if not os.path.exists(os.path.join(app.srcdir, fname + ".rst")):
+ continue
+
+ has = False
+ for l in latex_documents:
+ if l[0] == fname:
+ has = True
+ break
+
+ if not has:
+ latex_documents.append((fname, doc + ".tex",
+ "Linux %s Documentation" % doc.capitalize(),
+ "The kernel development community",
+ "manual"))
# helper
# ------
+
def have_command(cmd):
"""Search ``cmd`` in the ``PATH`` environment.
@@ -28,103 +137,89 @@ def have_command(cmd):
"""
return shutil.which(cmd) is not None
-# Get Sphinx version
-major, minor, patch = sphinx.version_info[:3]
-
-#
-# Warn about older versions that we don't want to support for much
-# longer.
-#
-if (major < 2) or (major == 2 and minor < 4):
- print('WARNING: support for Sphinx < 2.4 will be removed soon.')
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('sphinx'))
-from load_config import loadConfig
# -- General configuration ------------------------------------------------
-# If your documentation needs a minimal Sphinx version, state it here.
-needs_sphinx = '2.4.4'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include',
- 'kfigure', 'sphinx.ext.ifconfig', 'automarkup',
- 'maintainers_include', 'sphinx.ext.autosectionlabel',
- 'kernel_abi', 'kernel_feat', 'translations']
-
-if major >= 3:
- if (major > 3) or (minor > 0 or patch >= 2):
- # Sphinx c function parser is more pedantic with regards to type
- # checking. Due to that, having macros at c:function cause problems.
- # Those needed to be scaped by using c_id_attributes[] array
- c_id_attributes = [
- # GCC Compiler types not parsed by Sphinx:
- "__restrict__",
-
- # include/linux/compiler_types.h:
- "__iomem",
- "__kernel",
- "noinstr",
- "notrace",
- "__percpu",
- "__rcu",
- "__user",
- "__force",
-
- # include/linux/compiler_attributes.h:
- "__alias",
- "__aligned",
- "__aligned_largest",
- "__always_inline",
- "__assume_aligned",
- "__cold",
- "__attribute_const__",
- "__copy",
- "__pure",
- "__designated_init",
- "__visible",
- "__printf",
- "__scanf",
- "__gnu_inline",
- "__malloc",
- "__mode",
- "__no_caller_saved_registers",
- "__noclone",
- "__nonstring",
- "__noreturn",
- "__packed",
- "__pure",
- "__section",
- "__always_unused",
- "__maybe_unused",
- "__used",
- "__weak",
- "noinline",
- "__fix_address",
- "__counted_by",
-
- # include/linux/memblock.h:
- "__init_memblock",
- "__meminit",
-
- # include/linux/init.h:
- "__init",
- "__ref",
-
- # include/linux/linkage.h:
- "asmlinkage",
-
- # include/linux/btf.h
- "__bpf_kfunc",
- ]
-
-else:
- extensions.append('cdomain')
+# Add any Sphinx extensions in alphabetic order
+extensions = [
+ "automarkup",
+ "kernel_abi",
+ "kerneldoc",
+ "kernel_feat",
+ "kernel_include",
+ "kfigure",
+ "maintainers_include",
+ "parser_yaml",
+ "rstFlatTable",
+ "sphinx.ext.autosectionlabel",
+ "sphinx.ext.ifconfig",
+ "translations",
+]
+# Since Sphinx version 3, the C function parser is more pedantic with regards
+# to type checking. Due to that, having macros at c:function cause problems.
+# Those needed to be escaped by using c_id_attributes[] array
+c_id_attributes = [
+ # GCC Compiler types not parsed by Sphinx:
+ "__restrict__",
+
+ # include/linux/compiler_types.h:
+ "__iomem",
+ "__kernel",
+ "noinstr",
+ "notrace",
+ "__percpu",
+ "__rcu",
+ "__user",
+ "__force",
+ "__counted_by_le",
+ "__counted_by_be",
+
+ # include/linux/compiler_attributes.h:
+ "__alias",
+ "__aligned",
+ "__aligned_largest",
+ "__always_inline",
+ "__assume_aligned",
+ "__cold",
+ "__attribute_const__",
+ "__copy",
+ "__pure",
+ "__designated_init",
+ "__visible",
+ "__printf",
+ "__scanf",
+ "__gnu_inline",
+ "__malloc",
+ "__mode",
+ "__no_caller_saved_registers",
+ "__noclone",
+ "__nonstring",
+ "__noreturn",
+ "__packed",
+ "__pure",
+ "__section",
+ "__always_unused",
+ "__maybe_unused",
+ "__used",
+ "__weak",
+ "noinline",
+ "__fix_address",
+ "__counted_by",
+
+ # include/linux/memblock.h:
+ "__init_memblock",
+ "__meminit",
+
+ # include/linux/init.h:
+ "__init",
+ "__ref",
+
+ # include/linux/linkage.h:
+ "asmlinkage",
+
+ # include/linux/btf.h
+ "__bpf_kfunc",
+]
# Ensure that autosectionlabel will produce unique names
autosectionlabel_prefix_document = True
@@ -133,54 +228,51 @@ autosectionlabel_maxdepth = 2
# Load math renderer:
# For html builder, load imgmath only when its dependencies are met.
# mathjax is the default math renderer since Sphinx 1.8.
-have_latex = have_command('latex')
-have_dvipng = have_command('dvipng')
+have_latex = have_command("latex")
+have_dvipng = have_command("dvipng")
load_imgmath = have_latex and have_dvipng
# Respect SPHINX_IMGMATH (for html docs only)
-if 'SPHINX_IMGMATH' in os.environ:
- env_sphinx_imgmath = os.environ['SPHINX_IMGMATH']
- if 'yes' in env_sphinx_imgmath:
+if "SPHINX_IMGMATH" in os.environ:
+ env_sphinx_imgmath = os.environ["SPHINX_IMGMATH"]
+ if "yes" in env_sphinx_imgmath:
load_imgmath = True
- elif 'no' in env_sphinx_imgmath:
+ elif "no" in env_sphinx_imgmath:
load_imgmath = False
else:
sys.stderr.write("Unknown env SPHINX_IMGMATH=%s ignored.\n" % env_sphinx_imgmath)
-# Always load imgmath for Sphinx <1.8 or for epub docs
-load_imgmath = (load_imgmath or (major == 1 and minor < 8)
- or 'epub' in sys.argv)
-
if load_imgmath:
extensions.append("sphinx.ext.imgmath")
- math_renderer = 'imgmath'
+ math_renderer = "imgmath"
else:
- math_renderer = 'mathjax'
+ math_renderer = "mathjax"
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['sphinx/templates']
+templates_path = ["sphinx/templates"]
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+# The suffixes of source filenames that will be automatically parsed
+source_suffix = {
+ ".rst": "restructuredtext",
+ ".yaml": "yaml",
+}
# The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# General information about the project.
-project = 'The Linux Kernel'
-copyright = 'The kernel development community'
-author = 'The kernel development community'
+project = "The Linux Kernel"
+copyright = "The kernel development community" # pylint: disable=W0622
+author = "The kernel development community"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
-# In a normal build, version and release are are set to KERNELVERSION and
+# In a normal build, version and release are set to KERNELVERSION and
# KERNELRELEASE, respectively, from the Makefile via Sphinx command line
# arguments.
#
@@ -189,86 +281,86 @@ author = 'The kernel development community'
try:
makefile_version = None
makefile_patchlevel = None
- for line in open('../Makefile'):
- key, val = [x.strip() for x in line.split('=', 2)]
- if key == 'VERSION':
- makefile_version = val
- elif key == 'PATCHLEVEL':
- makefile_patchlevel = val
- if makefile_version and makefile_patchlevel:
- break
-except:
+ with open("../Makefile", encoding="utf=8") as fp:
+ for line in fp:
+ key, val = [x.strip() for x in line.split("=", 2)]
+ if key == "VERSION":
+ makefile_version = val
+ elif key == "PATCHLEVEL":
+ makefile_patchlevel = val
+ if makefile_version and makefile_patchlevel:
+ break
+except Exception:
pass
finally:
if makefile_version and makefile_patchlevel:
- version = release = makefile_version + '.' + makefile_patchlevel
+ version = release = makefile_version + "." + makefile_patchlevel
else:
version = release = "unknown version"
-#
-# HACK: there seems to be no easy way for us to get at the version and
-# release information passed in from the makefile...so go pawing through the
-# command-line options and find it for ourselves.
-#
+
def get_cline_version():
- c_version = c_release = ''
+ """
+ HACK: There seems to be no easy way for us to get at the version and
+ release information passed in from the makefile...so go pawing through the
+ command-line options and find it for ourselves.
+ """
+
+ c_version = c_release = ""
for arg in sys.argv:
- if arg.startswith('version='):
+ if arg.startswith("version="):
c_version = arg[8:]
- elif arg.startswith('release='):
+ elif arg.startswith("release="):
c_release = arg[8:]
if c_version:
if c_release:
- return c_version + '-' + c_release
+ return c_version + "-" + c_release
return c_version
- return version # Whatever we came up with before
+ return version # Whatever we came up with before
+
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
-language = 'en'
+language = "en"
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
-#today = ''
+# today = ''
# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-exclude_patterns = ['output']
+# today_fmt = '%B %d, %Y'
# The reST default role (used for this markup: `text`) to use for all
# documents.
-#default_role = None
+# default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
-#show_authors = False
+# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
-primary_domain = 'c'
-highlight_language = 'none'
+primary_domain = "c"
+highlight_language = "none"
# -- Options for HTML output ----------------------------------------------
@@ -276,43 +368,45 @@ highlight_language = 'none'
# a list of builtin themes.
# Default theme
-html_theme = 'alabaster'
+html_theme = "alabaster"
html_css_files = []
if "DOCS_THEME" in os.environ:
html_theme = os.environ["DOCS_THEME"]
-if html_theme == 'sphinx_rtd_theme' or html_theme == 'sphinx_rtd_dark_mode':
+if html_theme in ["sphinx_rtd_theme", "sphinx_rtd_dark_mode"]:
# Read the Docs theme
try:
import sphinx_rtd_theme
+
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_css_files = [
- 'theme_overrides.css',
+ "theme_overrides.css",
]
# Read the Docs dark mode override theme
- if html_theme == 'sphinx_rtd_dark_mode':
+ if html_theme == "sphinx_rtd_dark_mode":
try:
- import sphinx_rtd_dark_mode
- extensions.append('sphinx_rtd_dark_mode')
+ import sphinx_rtd_dark_mode # pylint: disable=W0611
+
+ extensions.append("sphinx_rtd_dark_mode")
except ImportError:
- html_theme == 'sphinx_rtd_theme'
+ html_theme = "sphinx_rtd_theme"
- if html_theme == 'sphinx_rtd_theme':
- # Add color-specific RTD normal mode
- html_css_files.append('theme_rtd_colors.css')
+ if html_theme == "sphinx_rtd_theme":
+ # Add color-specific RTD normal mode
+ html_css_files.append("theme_rtd_colors.css")
html_theme_options = {
- 'navigation_depth': -1,
+ "navigation_depth": -1,
}
except ImportError:
- html_theme = 'alabaster'
+ html_theme = "alabaster"
if "DOCS_CSS" in os.environ:
css = os.environ["DOCS_CSS"].split(" ")
@@ -320,22 +414,14 @@ if "DOCS_CSS" in os.environ:
for l in css:
html_css_files.append(l)
-if major <= 1 and minor < 8:
- html_context = {
- 'css_files': [],
- }
-
- for l in html_css_files:
- html_context['css_files'].append('_static/' + l)
-
-if html_theme == 'alabaster':
+if html_theme == "alabaster":
html_theme_options = {
- 'description': get_cline_version(),
- 'page_width': '65em',
- 'sidebar_width': '15em',
- 'fixed_sidebar': 'true',
- 'font_size': 'inherit',
- 'font_family': 'serif',
+ "description": get_cline_version(),
+ "page_width": "65em",
+ "sidebar_width": "15em",
+ "fixed_sidebar": "true",
+ "font_size": "inherit",
+ "font_family": "serif",
}
sys.stderr.write("Using %s theme\n" % html_theme)
@@ -343,139 +429,100 @@ sys.stderr.write("Using %s theme\n" % html_theme)
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['sphinx-static']
+html_static_path = ["sphinx-static"]
# If true, Docutils "smart quotes" will be used to convert quotes and dashes
-# to typographically correct entities. This will convert "--" to "—",
-# which is not always what we want, so disable it.
-smartquotes = False
+# to typographically correct entities. However, conversion of "--" to "—"
+# is not always what we want, so enable only quotes.
+smartquotes_action = "q"
# Custom sidebar templates, maps document names to template names.
# Note that the RTD theme ignores this
-html_sidebars = { '**': ['searchbox.html', 'kernel-toc.html', 'sourcelink.html']}
+html_sidebars = {"**": ["searchbox.html",
+ "kernel-toc.html",
+ "sourcelink.html"]}
# about.html is available for alabaster theme. Add it at the front.
-if html_theme == 'alabaster':
- html_sidebars['**'].insert(0, 'about.html')
+if html_theme == "alabaster":
+ html_sidebars["**"].insert(0, "about.html")
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
-html_logo = 'images/logo.svg'
+html_logo = "images/logo.svg"
# Output file base name for HTML help builder.
-htmlhelp_basename = 'TheLinuxKerneldoc'
+htmlhelp_basename = "TheLinuxKerneldoc"
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
- 'papersize': 'a4paper',
-
+ "papersize": "a4paper",
+ "passoptionstopackages": dedent(r"""
+ \PassOptionsToPackage{svgnames}{xcolor}
+ """),
# The font size ('10pt', '11pt' or '12pt').
- 'pointsize': '11pt',
-
+ "pointsize": "11pt",
+ # Needed to generate a .ind file
+ "printindex": r"\footnotesize\raggedright\printindex",
# Latex figure (float) alignment
- #'figure_align': 'htbp',
-
+ # 'figure_align': 'htbp',
# Don't mangle with UTF-8 chars
- 'inputenc': '',
- 'utf8extra': '',
-
+ "fontenc": "",
+ "inputenc": "",
+ "utf8extra": "",
# Set document margins
- 'sphinxsetup': '''
+ "sphinxsetup": dedent(r"""
hmargin=0.5in, vmargin=1in,
parsedliteralwraps=true,
verbatimhintsturnover=false,
- ''',
-
+ """),
+ #
+ # Some of our authors are fond of deep nesting; tell latex to
+ # cope.
+ #
+ "maxlistdepth": "10",
# For CJK One-half spacing, need to be in front of hyperref
- 'extrapackages': r'\usepackage{setspace}',
-
- # Additional stuff for the LaTeX preamble.
- 'preamble': '''
- % Use some font with UTF-8 support with XeLaTeX
- \\usepackage{fontspec}
- \\setsansfont{DejaVu Sans}
- \\setromanfont{DejaVu Serif}
- \\setmonofont{DejaVu Sans Mono}
- ''',
-}
-
-# Fix reference escape troubles with Sphinx 1.4.x
-if major == 1:
- latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
-
-
-# Load kerneldoc specific LaTeX settings
-latex_elements['preamble'] += '''
+ "extrapackages": r"\usepackage{setspace}",
+ "fontpkg": dedent(r"""
+ \usepackage{fontspec}
+ \setmainfont{DejaVu Serif}
+ \setsansfont{DejaVu Sans}
+ \setmonofont{DejaVu Sans Mono}
+ \newfontfamily\headingfont{DejaVu Serif}
+ """),
+ "preamble": dedent(r"""
% Load kerneldoc specific LaTeX settings
- \\input{kerneldoc-preamble.sty}
-'''
-
-# With Sphinx 1.6, it is possible to change the Bg color directly
-# by using:
-# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
-# \definecolor{sphinxwarningBgColor}{RGB}{255,204,204}
-# \definecolor{sphinxattentionBgColor}{RGB}{255,255,204}
-# \definecolor{sphinximportantBgColor}{RGB}{192,255,204}
-#
-# However, it require to use sphinx heavy box with:
-#
-# \renewenvironment{sphinxlightbox} {%
-# \\begin{sphinxheavybox}
-# }
-# \\end{sphinxheavybox}
-# }
-#
-# Unfortunately, the implementation is buggy: if a note is inside a
-# table, it isn't displayed well. So, for now, let's use boring
-# black and white notes.
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-# author, documentclass [howto, manual, or own class]).
-# Sorted in alphabetical order
-latex_documents = [
-]
+ \input{kerneldoc-preamble.sty}
+ """)
+}
-# Add all other index files from Documentation/ subdirectories
-for fn in os.listdir('.'):
- doc = os.path.join(fn, "index")
- if os.path.exists(doc + ".rst"):
- has = False
- for l in latex_documents:
- if l[0] == doc:
- has = True
- break
- if not has:
- latex_documents.append((doc, fn + '.tex',
- 'Linux %s Documentation' % fn.capitalize(),
- 'The kernel development community',
- 'manual'))
+# This will be filled up by config-inited event
+latex_documents = []
# The name of an image file (relative to this directory) to place at the top of
# the title page.
-#latex_logo = None
+# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
# If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
# If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
# Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
# If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
# Additional LaTeX stuff to be copied to build directory
latex_additional_files = [
- 'sphinx/kerneldoc-preamble.sty',
+ "sphinx/kerneldoc-preamble.sty",
]
@@ -484,12 +531,11 @@ latex_additional_files = [
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
- (master_doc, 'thelinuxkernel', 'The Linux Kernel Documentation',
- [author], 1)
+ (master_doc, "thelinuxkernel", "The Linux Kernel Documentation", [author], 1)
]
# If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
@@ -497,11 +543,15 @@ man_pages = [
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
-texinfo_documents = [
- (master_doc, 'TheLinuxKernel', 'The Linux Kernel Documentation',
- author, 'TheLinuxKernel', 'One line description of project.',
- 'Miscellaneous'),
-]
+texinfo_documents = [(
+ master_doc,
+ "TheLinuxKernel",
+ "The Linux Kernel Documentation",
+ author,
+ "TheLinuxKernel",
+ "One line description of project.",
+ "Miscellaneous",
+ ),]
# -- Options for Epub output ----------------------------------------------
@@ -512,9 +562,9 @@ epub_publisher = author
epub_copyright = copyright
# A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
+epub_exclude_files = ["search.html"]
-#=======
+# =======
# rst2pdf
#
# Grouping the document tree into PDF files. List of tuples
@@ -526,17 +576,16 @@ epub_exclude_files = ['search.html']
# multiple PDF files here actually tries to get the cross-referencing right
# *between* PDF files.
pdf_documents = [
- ('kernel-documentation', u'Kernel', u'Kernel', u'J. Random Bozo'),
+ ("kernel-documentation", "Kernel", "Kernel", "J. Random Bozo"),
]
# kernel-doc extension configuration for running Sphinx directly (e.g. by Read
# the Docs). In a normal build, these are supplied from the Makefile via command
# line arguments.
-kerneldoc_bin = '../scripts/kernel-doc'
-kerneldoc_srctree = '..'
-
-# ------------------------------------------------------------------------------
-# Since loadConfig overwrites settings from the global namespace, it has to be
-# the last statement in the conf.py file
-# ------------------------------------------------------------------------------
-loadConfig(globals())
+kerneldoc_bin = "../scripts/kernel-doc.py"
+kerneldoc_srctree = ".."
+
+def setup(app):
+ """Patterns need to be updated at init time on older Sphinx versions"""
+
+ app.connect('config-inited', config_init)
diff --git a/Documentation/core-api/assoc_array.rst b/Documentation/core-api/assoc_array.rst
index 792bbf9939e1..19d89f92bf8d 100644
--- a/Documentation/core-api/assoc_array.rst
+++ b/Documentation/core-api/assoc_array.rst
@@ -92,18 +92,18 @@ There are two functions for dealing with the script:
void assoc_array_apply_edit(struct assoc_array_edit *edit);
-This will perform the edit functions, interpolating various write barriers
-to permit accesses under the RCU read lock to continue. The edit script
-will then be passed to ``call_rcu()`` to free it and any dead stuff it points
-to.
+ This will perform the edit functions, interpolating various write barriers
+ to permit accesses under the RCU read lock to continue. The edit script
+ will then be passed to ``call_rcu()`` to free it and any dead stuff it
+ points to.
2. Cancel an edit script::
void assoc_array_cancel_edit(struct assoc_array_edit *edit);
-This frees the edit script and all preallocated memory immediately. If
-this was for insertion, the new object is _not_ released by this function,
-but must rather be released by the caller.
+ This frees the edit script and all preallocated memory immediately. If
+ this was for insertion, the new object is *not* released by this function,
+ but must rather be released by the caller.
These functions are guaranteed not to fail.
@@ -123,43 +123,43 @@ This points to a number of methods, all of which need to be provided:
unsigned long (*get_key_chunk)(const void *index_key, int level);
-This should return a chunk of caller-supplied index key starting at the
-*bit* position given by the level argument. The level argument will be a
-multiple of ``ASSOC_ARRAY_KEY_CHUNK_SIZE`` and the function should return
-``ASSOC_ARRAY_KEY_CHUNK_SIZE bits``. No error is possible.
+ This should return a chunk of caller-supplied index key starting at the
+ *bit* position given by the level argument. The level argument will be a
+ multiple of ``ASSOC_ARRAY_KEY_CHUNK_SIZE`` and the function should return
+ ``ASSOC_ARRAY_KEY_CHUNK_SIZE bits``. No error is possible.
2. Get a chunk of an object's index key::
unsigned long (*get_object_key_chunk)(const void *object, int level);
-As the previous function, but gets its data from an object in the array
-rather than from a caller-supplied index key.
+ As the previous function, but gets its data from an object in the array
+ rather than from a caller-supplied index key.
3. See if this is the object we're looking for::
bool (*compare_object)(const void *object, const void *index_key);
-Compare the object against an index key and return ``true`` if it matches and
-``false`` if it doesn't.
+ Compare the object against an index key and return ``true`` if it matches
+ and ``false`` if it doesn't.
4. Diff the index keys of two objects::
int (*diff_objects)(const void *object, const void *index_key);
-Return the bit position at which the index key of the specified object
-differs from the given index key or -1 if they are the same.
+ Return the bit position at which the index key of the specified object
+ differs from the given index key or -1 if they are the same.
5. Free an object::
void (*free_object)(void *object);
-Free the specified object. Note that this may be called an RCU grace period
-after ``assoc_array_apply_edit()`` was called, so ``synchronize_rcu()`` may be
-necessary on module unloading.
+ Free the specified object. Note that this may be called an RCU grace period
+ after ``assoc_array_apply_edit()`` was called, so ``synchronize_rcu()`` may
+ be necessary on module unloading.
Manipulation Functions
@@ -171,7 +171,7 @@ There are a number of functions for manipulating an associative array:
void assoc_array_init(struct assoc_array *array);
-This initialises the base structure for an associative array. It can't fail.
+ This initialises the base structure for an associative array. It can't fail.
2. Insert/replace an object in an associative array::
@@ -182,21 +182,21 @@ This initialises the base structure for an associative array. It can't fail.
const void *index_key,
void *object);
-This inserts the given object into the array. Note that the least
-significant bit of the pointer must be zero as it's used to type-mark
-pointers internally.
+ This inserts the given object into the array. Note that the least
+ significant bit of the pointer must be zero as it's used to type-mark
+ pointers internally.
-If an object already exists for that key then it will be replaced with the
-new object and the old one will be freed automatically.
+ If an object already exists for that key then it will be replaced with the
+ new object and the old one will be freed automatically.
-The ``index_key`` argument should hold index key information and is
-passed to the methods in the ops table when they are called.
+ The ``index_key`` argument should hold index key information and is
+ passed to the methods in the ops table when they are called.
-This function makes no alteration to the array itself, but rather returns
-an edit script that must be applied. ``-ENOMEM`` is returned in the case of
-an out-of-memory error.
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. ``-ENOMEM`` is returned in the case of
+ an out-of-memory error.
-The caller should lock exclusively against other modifiers of the array.
+ The caller should lock exclusively against other modifiers of the array.
3. Delete an object from an associative array::
@@ -206,15 +206,15 @@ The caller should lock exclusively against other modifiers of the array.
const struct assoc_array_ops *ops,
const void *index_key);
-This deletes an object that matches the specified data from the array.
+ This deletes an object that matches the specified data from the array.
-The ``index_key`` argument should hold index key information and is
-passed to the methods in the ops table when they are called.
+ The ``index_key`` argument should hold index key information and is
+ passed to the methods in the ops table when they are called.
-This function makes no alteration to the array itself, but rather returns
-an edit script that must be applied. ``-ENOMEM`` is returned in the case of
-an out-of-memory error. ``NULL`` will be returned if the specified object is
-not found within the array.
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. ``-ENOMEM`` is returned in the case of
+ an out-of-memory error. ``NULL`` will be returned if the specified object
+ is not found within the array.
The caller should lock exclusively against other modifiers of the array.
@@ -225,14 +225,14 @@ The caller should lock exclusively against other modifiers of the array.
assoc_array_clear(struct assoc_array *array,
const struct assoc_array_ops *ops);
-This deletes all the objects from an associative array and leaves it
-completely empty.
+ This deletes all the objects from an associative array and leaves it
+ completely empty.
-This function makes no alteration to the array itself, but rather returns
-an edit script that must be applied. ``-ENOMEM`` is returned in the case of
-an out-of-memory error.
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. ``-ENOMEM`` is returned in the case of
+ an out-of-memory error.
-The caller should lock exclusively against other modifiers of the array.
+ The caller should lock exclusively against other modifiers of the array.
5. Destroy an associative array, deleting all objects::
@@ -240,14 +240,14 @@ The caller should lock exclusively against other modifiers of the array.
void assoc_array_destroy(struct assoc_array *array,
const struct assoc_array_ops *ops);
-This destroys the contents of the associative array and leaves it
-completely empty. It is not permitted for another thread to be traversing
-the array under the RCU read lock at the same time as this function is
-destroying it as no RCU deferral is performed on memory release -
-something that would require memory to be allocated.
+ This destroys the contents of the associative array and leaves it
+ completely empty. It is not permitted for another thread to be traversing
+ the array under the RCU read lock at the same time as this function is
+ destroying it as no RCU deferral is performed on memory release -
+ something that would require memory to be allocated.
-The caller should lock exclusively against other modifiers and accessors
-of the array.
+ The caller should lock exclusively against other modifiers and accessors
+ of the array.
6. Garbage collect an associative array::
@@ -257,24 +257,24 @@ of the array.
bool (*iterator)(void *object, void *iterator_data),
void *iterator_data);
-This iterates over the objects in an associative array and passes each one to
-``iterator()``. If ``iterator()`` returns ``true``, the object is kept. If it
-returns ``false``, the object will be freed. If the ``iterator()`` function
-returns ``true``, it must perform any appropriate refcount incrementing on the
-object before returning.
+ This iterates over the objects in an associative array and passes each one
+ to ``iterator()``. If ``iterator()`` returns ``true``, the object is kept.
+ If it returns ``false``, the object will be freed. If the ``iterator()``
+ function returns ``true``, it must perform any appropriate refcount
+ incrementing on the object before returning.
-The internal tree will be packed down if possible as part of the iteration
-to reduce the number of nodes in it.
+ The internal tree will be packed down if possible as part of the iteration
+ to reduce the number of nodes in it.
-The ``iterator_data`` is passed directly to ``iterator()`` and is otherwise
-ignored by the function.
+ The ``iterator_data`` is passed directly to ``iterator()`` and is otherwise
+ ignored by the function.
-The function will return ``0`` if successful and ``-ENOMEM`` if there wasn't
-enough memory.
+ The function will return ``0`` if successful and ``-ENOMEM`` if there wasn't
+ enough memory.
-It is possible for other threads to iterate over or search the array under
-the RCU read lock while this function is in progress. The caller should
-lock exclusively against other modifiers of the array.
+ It is possible for other threads to iterate over or search the array under
+ the RCU read lock while this function is in progress. The caller should
+ lock exclusively against other modifiers of the array.
Access Functions
@@ -289,19 +289,19 @@ There are two functions for accessing an associative array:
void *iterator_data),
void *iterator_data);
-This passes each object in the array to the iterator callback function.
-``iterator_data`` is private data for that function.
+ This passes each object in the array to the iterator callback function.
+ ``iterator_data`` is private data for that function.
-This may be used on an array at the same time as the array is being
-modified, provided the RCU read lock is held. Under such circumstances,
-it is possible for the iteration function to see some objects twice. If
-this is a problem, then modification should be locked against. The
-iteration algorithm should not, however, miss any objects.
+ This may be used on an array at the same time as the array is being
+ modified, provided the RCU read lock is held. Under such circumstances,
+ it is possible for the iteration function to see some objects twice. If
+ this is a problem, then modification should be locked against. The
+ iteration algorithm should not, however, miss any objects.
-The function will return ``0`` if no objects were in the array or else it will
-return the result of the last iterator function called. Iteration stops
-immediately if any call to the iteration function results in a non-zero
-return.
+ The function will return ``0`` if no objects were in the array or else it
+ will return the result of the last iterator function called. Iteration
+ stops immediately if any call to the iteration function results in a
+ non-zero return.
2. Find an object in an associative array::
@@ -310,14 +310,14 @@ return.
const struct assoc_array_ops *ops,
const void *index_key);
-This walks through the array's internal tree directly to the object
-specified by the index key..
+ This walks through the array's internal tree directly to the object
+ specified by the index key.
-This may be used on an array at the same time as the array is being
-modified, provided the RCU read lock is held.
+ This may be used on an array at the same time as the array is being
+ modified, provided the RCU read lock is held.
-The function will return the object if found (and set ``*_type`` to the object
-type) or will return ``NULL`` if the object was not found.
+ The function will return the object if found (and set ``*_type`` to the
+ object type) or will return ``NULL`` if the object was not found.
Index Key Form
@@ -399,10 +399,11 @@ fixed levels. For example::
In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
Assuming no other meta data nodes in the tree, the key space is divided
-thusly::
+thusly:
+ =========== ====
KEY PREFIX NODE
- ========== ====
+ =========== ====
137* D
138* E
13[0-69-f]* C
@@ -410,10 +411,12 @@ thusly::
e6* G
e[0-57-f]* F
[02-df]* A
+ =========== ====
So, for instance, keys with the following example index keys will be found in
-the appropriate nodes::
+the appropriate nodes:
+ =============== ======= ====
INDEX KEY PREFIX NODE
=============== ======= ====
13694892892489 13 C
@@ -422,12 +425,13 @@ the appropriate nodes::
138bbb89003093 138 E
1394879524789 12 C
1458952489 1 B
- 9431809de993ba - A
- b4542910809cd - A
+ 9431809de993ba \- A
+ b4542910809cd \- A
e5284310def98 e F
e68428974237 e6 G
e7fffcbd443 e F
- f3842239082 - A
+ f3842239082 \- A
+ =============== ======= ====
To save memory, if a node can hold all the leaves in its portion of keyspace,
then the node will have all those leaves in it and will not have any metadata
@@ -441,8 +445,9 @@ metadata pointer. If the metadata pointer is there, any leaf whose key matches
the metadata key prefix must be in the subtree that the metadata pointer points
to.
-In the above example list of index keys, node A will contain::
+In the above example list of index keys, node A will contain:
+ ==== =============== ==================
SLOT CONTENT INDEX KEY (PREFIX)
==== =============== ==================
1 PTR TO NODE B 1*
@@ -450,11 +455,16 @@ In the above example list of index keys, node A will contain::
any LEAF b4542910809cd
e PTR TO NODE F e*
any LEAF f3842239082
+ ==== =============== ==================
-and node B::
+and node B:
- 3 PTR TO NODE C 13*
- any LEAF 1458952489
+ ==== =============== ==================
+ SLOT CONTENT INDEX KEY (PREFIX)
+ ==== =============== ==================
+ 3 PTR TO NODE C 13*
+ any LEAF 1458952489
+ ==== =============== ==================
Shortcuts
diff --git a/Documentation/core-api/cgroup.rst b/Documentation/core-api/cgroup.rst
new file mode 100644
index 000000000000..734ea21e1e17
--- /dev/null
+++ b/Documentation/core-api/cgroup.rst
@@ -0,0 +1,9 @@
+==================
+Cgroup Kernel APIs
+==================
+
+Device Memory Cgroup API (dmemcg)
+=================================
+.. kernel-doc:: kernel/cgroup/dmem.c
+ :export:
+
diff --git a/Documentation/core-api/cleanup.rst b/Documentation/core-api/cleanup.rst
new file mode 100644
index 000000000000..527eb2f8ec6e
--- /dev/null
+++ b/Documentation/core-api/cleanup.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Scope-based Cleanup Helpers
+===========================
+
+.. kernel-doc:: include/linux/cleanup.h
+ :doc: scope-based cleanup helpers
diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index dcb0e379e5e8..e1b0eeabbb5e 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -616,7 +616,7 @@ ONLINE section for notifications on online and offline operation::
....
cpuhp_remove_instance(state, &inst2->node);
....
- remove_multi_state(state);
+ cpuhp_remove_multi_state(state);
Testing of hotplug states
@@ -737,8 +737,9 @@ can process the event further.
When changes to the CPUs in the system occur, the sysfs file
/sys/devices/system/cpu/crash_hotplug contains '1' if the kernel
-updates the kdump capture kernel list of CPUs itself (via elfcorehdr),
-or '0' if userspace must update the kdump capture kernel list of CPUs.
+updates the kdump capture kernel list of CPUs itself (via elfcorehdr and
+other relevant kexec segment), or '0' if userspace must update the kdump
+capture kernel list of CPUs.
The availability depends on the CONFIG_HOTPLUG_CPU kernel configuration
option.
@@ -750,8 +751,9 @@ file can be used in a udev rule as follows:
SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
For a CPU hot un/plug event, if the architecture supports kernel updates
-of the elfcorehdr (which contains the list of CPUs), then the rule skips
-the unload-then-reload of the kdump capture kernel.
+of the elfcorehdr (which contains the list of CPUs) and other relevant
+kexec segments, then the rule skips the unload-then-reload of the kdump
+capture kernel.
Kernel Inline Documentations Reference
======================================
diff --git a/Documentation/core-api/dma-api-howto.rst b/Documentation/core-api/dma-api-howto.rst
index e8a55f9d61db..96fce2a9aa90 100644
--- a/Documentation/core-api/dma-api-howto.rst
+++ b/Documentation/core-api/dma-api-howto.rst
@@ -155,7 +155,7 @@ a device with limitations, it needs to be decreased.
Special note about PCI: PCI-X specification requires PCI-X devices to support
64-bit addressing (DAC) for all transactions. And at least one platform (SGI
-SN2) requires 64-bit consistent allocations to operate correctly when the IO
+SN2) requires 64-bit coherent allocations to operate correctly when the IO
bus is in PCI-X mode.
For correct operation, you must set the DMA mask to inform the kernel about
@@ -174,7 +174,7 @@ used instead:
int dma_set_mask(struct device *dev, u64 mask);
- The setup for consistent allocations is performed via a call
+ The setup for coherent allocations is performed via a call
to dma_set_coherent_mask()::
int dma_set_coherent_mask(struct device *dev, u64 mask);
@@ -203,13 +203,33 @@ setting the DMA mask fails. In this manner, if a user of your driver reports
that performance is bad or that the device is not even detected, you can ask
them for the kernel messages to find out exactly why.
-The standard 64-bit addressing device would do something like this::
+The 24-bit addressing device would do something like this::
- if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
+ if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(24))) {
dev_warn(dev, "mydev: No suitable DMA available\n");
goto ignore_this_device;
}
+The standard 64-bit addressing device would do something like this::
+
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))
+
+dma_set_mask_and_coherent() never return fail when DMA_BIT_MASK(64). Typical
+error code like::
+
+ /* Wrong code */
+ if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))
+
+dma_set_mask_and_coherent() will never return failure when bigger than 32.
+So typical code like::
+
+ /* Recommended code */
+ if (support_64bit)
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ else
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+
If the device only supports 32-bit addressing for descriptors in the
coherent allocations, but supports full 64-bits for streaming mappings
it would look like this::
@@ -221,7 +241,7 @@ it would look like this::
The coherent mask will always be able to set the same or a smaller mask as
the streaming mask. However for the rare case that a device driver only
-uses consistent allocations, one would have to check the return value from
+uses coherent allocations, one would have to check the return value from
dma_set_coherent_mask().
Finally, if your device can only drive the low 24-bits of
@@ -278,20 +298,20 @@ Types of DMA mappings
There are two types of DMA mappings:
-- Consistent DMA mappings which are usually mapped at driver
+- Coherent DMA mappings which are usually mapped at driver
initialization, unmapped at the end and for which the hardware should
guarantee that the device and the CPU can access the data
in parallel and will see updates made by each other without any
explicit software flushing.
- Think of "consistent" as "synchronous" or "coherent".
+ Think of "coherent" as "synchronous".
- The current default is to return consistent memory in the low 32
+ The current default is to return coherent memory in the low 32
bits of the DMA space. However, for future compatibility you should
- set the consistent mask even if this default is fine for your
+ set the coherent mask even if this default is fine for your
driver.
- Good examples of what to use consistent mappings for are:
+ Good examples of what to use coherent mappings for are:
- Network card DMA ring descriptors.
- SCSI adapter mailbox command data structures.
@@ -300,13 +320,13 @@ There are two types of DMA mappings:
The invariant these examples all require is that any CPU store
to memory is immediately visible to the device, and vice
- versa. Consistent mappings guarantee this.
+ versa. Coherent mappings guarantee this.
.. important::
- Consistent DMA memory does not preclude the usage of
+ Coherent DMA memory does not preclude the usage of
proper memory barriers. The CPU may reorder stores to
- consistent memory just as it may normal memory. Example:
+ coherent memory just as it may normal memory. Example:
if it is important for the device to see the first word
of a descriptor updated before the second, you must do
something like::
@@ -345,10 +365,10 @@ Also, systems with caches that aren't DMA-coherent will work better
when the underlying buffers don't share cache lines with other data.
-Using Consistent DMA mappings
-=============================
+Using Coherent DMA mappings
+===========================
-To allocate and map large (PAGE_SIZE or so) consistent DMA regions,
+To allocate and map large (PAGE_SIZE or so) coherent DMA regions,
you should do::
dma_addr_t dma_handle;
@@ -365,10 +385,10 @@ __get_free_pages() (but takes size instead of a page order). If your
driver needs regions sized smaller than a page, you may prefer using
the dma_pool interface, described below.
-The consistent DMA mapping interfaces, will by default return a DMA address
+The coherent DMA mapping interfaces, will by default return a DMA address
which is 32-bit addressable. Even if the device indicates (via the DMA mask)
-that it may address the upper 32-bits, consistent allocation will only
-return > 32-bit addresses for DMA if the consistent DMA mask has been
+that it may address the upper 32-bits, coherent allocation will only
+return > 32-bit addresses for DMA if the coherent DMA mask has been
explicitly changed via dma_set_coherent_mask(). This is true of the
dma_pool interface as well.
@@ -477,7 +497,7 @@ program address space. Such platforms can and do report errors in the
kernel logs when the DMA controller hardware detects violation of the
permission setting.
-Only streaming mappings specify a direction, consistent mappings
+Only streaming mappings specify a direction, coherent mappings
implicitly have a direction attribute setting of
DMA_BIDIRECTIONAL.
diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst
index 8e3cce3d0a23..ca75b3541679 100644
--- a/Documentation/core-api/dma-api.rst
+++ b/Documentation/core-api/dma-api.rst
@@ -8,15 +8,15 @@ This document describes the DMA API. For a more gentle introduction
of the API (and actual examples), see Documentation/core-api/dma-api-howto.rst.
This API is split into two pieces. Part I describes the basic API.
-Part II describes extensions for supporting non-consistent memory
+Part II describes extensions for supporting non-coherent memory
machines. Unless you know that your driver absolutely has to support
-non-consistent platforms (this is usually only legacy platforms) you
+non-coherent platforms (this is usually only legacy platforms) you
should only use the API described in part I.
-Part I - dma_API
+Part I - DMA API
----------------
-To get the dma_API, you must #include <linux/dma-mapping.h>. This
+To get the DMA API, you must #include <linux/dma-mapping.h>. This
provides dma_addr_t and the interfaces described below.
A dma_addr_t can hold any valid DMA address for the platform. It can be
@@ -33,13 +33,13 @@ Part Ia - Using large DMA-coherent buffers
dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag)
-Consistent memory is memory for which a write by either the device or
+Coherent memory is memory for which a write by either the device or
the processor can immediately be read by the processor or device
without having to worry about caching effects. (You may however need
to make sure to flush the processor's write buffers before telling
devices to read that memory.)
-This routine allocates a region of <size> bytes of consistent memory.
+This routine allocates a region of <size> bytes of coherent memory.
It returns a pointer to the allocated region (in the processor's virtual
address space) or NULL if the allocation failed.
@@ -48,15 +48,14 @@ It also returns a <dma_handle> which may be cast to an unsigned integer the
same width as the bus and given to the device as the DMA address base of
the region.
-Note: consistent memory can be expensive on some platforms, and the
+Note: coherent memory can be expensive on some platforms, and the
minimum allocation length may be as big as a page, so you should
-consolidate your requests for consistent memory as much as possible.
+consolidate your requests for coherent memory as much as possible.
The simplest way to do that is to use the dma_pool calls (see below).
-The flag parameter (dma_alloc_coherent() only) allows the caller to
-specify the ``GFP_`` flags (see kmalloc()) for the allocation (the
-implementation may choose to ignore flags that affect the location of
-the returned memory, like GFP_DMA).
+The flag parameter allows the caller to specify the ``GFP_`` flags (see
+kmalloc()) for the allocation (the implementation may ignore flags that affect
+the location of the returned memory, like GFP_DMA).
::
@@ -64,19 +63,18 @@ the returned memory, like GFP_DMA).
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle)
-Free a region of consistent memory you previously allocated. dev,
-size and dma_handle must all be the same as those passed into
-dma_alloc_coherent(). cpu_addr must be the virtual address returned by
-the dma_alloc_coherent().
+Free a previously allocated region of coherent memory. dev, size and dma_handle
+must all be the same as those passed into dma_alloc_coherent(). cpu_addr must
+be the virtual address returned by dma_alloc_coherent().
-Note that unlike their sibling allocation calls, these routines
-may only be called with IRQs enabled.
+Note that unlike the sibling allocation call, this routine may only be called
+with IRQs enabled.
Part Ib - Using small DMA-coherent buffers
------------------------------------------
-To get this part of the dma_API, you must #include <linux/dmapool.h>
+To get this part of the DMA API, you must #include <linux/dmapool.h>
Many drivers need lots of small DMA-coherent memory regions for DMA
descriptors or I/O buffers. Rather than allocating in units of a page
@@ -85,78 +83,29 @@ much like a struct kmem_cache, except that they use the DMA-coherent allocator,
not __get_free_pages(). Also, they understand common hardware constraints
for alignment, like queue heads needing to be aligned on N-byte boundaries.
+.. kernel-doc:: mm/dmapool.c
+ :export:
-::
-
- struct dma_pool *
- dma_pool_create(const char *name, struct device *dev,
- size_t size, size_t align, size_t alloc);
-
-dma_pool_create() initializes a pool of DMA-coherent buffers
-for use with a given device. It must be called in a context which
-can sleep.
-
-The "name" is for diagnostics (like a struct kmem_cache name); dev and size
-are like what you'd pass to dma_alloc_coherent(). The device's hardware
-alignment requirement for this type of data is "align" (which is expressed
-in bytes, and must be a power of two). If your device has no boundary
-crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
-from this pool must not cross 4KByte boundaries.
-
-::
-
- void *
- dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
- dma_addr_t *handle)
-
-Wraps dma_pool_alloc() and also zeroes the returned memory if the
-allocation attempt succeeded.
-
-
-::
-
- void *
- dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
- dma_addr_t *dma_handle);
-
-This allocates memory from the pool; the returned memory will meet the
-size and alignment requirements specified at creation time. Pass
-GFP_ATOMIC to prevent blocking, or if it's permitted (not
-in_interrupt, not holding SMP locks), pass GFP_KERNEL to allow
-blocking. Like dma_alloc_coherent(), this returns two values: an
-address usable by the CPU, and the DMA address usable by the pool's
-device.
-
-::
-
- void
- dma_pool_free(struct dma_pool *pool, void *vaddr,
- dma_addr_t addr);
-
-This puts memory back into the pool. The pool is what was passed to
-dma_pool_alloc(); the CPU (vaddr) and DMA addresses are what
-were returned when that routine allocated the memory being freed.
-
-::
-
- void
- dma_pool_destroy(struct dma_pool *pool);
-
-dma_pool_destroy() frees the resources of the pool. It must be
-called in a context which can sleep. Make sure you've freed all allocated
-memory back to the pool before you destroy it.
+.. kernel-doc:: include/linux/dmapool.h
Part Ic - DMA addressing limitations
------------------------------------
+DMA mask is a bit mask of the addressable region for the device. In other words,
+if applying the DMA mask (a bitwise AND operation) to the DMA address of a
+memory region does not clear any bits in the address, then the device can
+perform DMA to that memory region.
+
+All the below functions which set a DMA mask may fail if the requested mask
+cannot be used with the device, or if the device is not capable of doing DMA.
+
::
int
dma_set_mask_and_coherent(struct device *dev, u64 mask)
-Checks to see if the mask is possible and updates the device
-streaming and coherent DMA mask parameters if it is.
+Updates both streaming and coherent DMA masks.
Returns: 0 if successful and a negative error if not.
@@ -165,8 +114,7 @@ Returns: 0 if successful and a negative error if not.
int
dma_set_mask(struct device *dev, u64 mask)
-Checks to see if the mask is possible and updates the device
-parameters if it is.
+Updates only the streaming DMA mask.
Returns: 0 if successful and a negative error if not.
@@ -175,8 +123,7 @@ Returns: 0 if successful and a negative error if not.
int
dma_set_coherent_mask(struct device *dev, u64 mask)
-Checks to see if the mask is possible and updates the device
-parameters if it is.
+Updates only the coherent DMA mask.
Returns: 0 if successful and a negative error if not.
@@ -231,12 +178,32 @@ transfer memory ownership. Returns %false if those calls can be skipped.
unsigned long
dma_get_merge_boundary(struct device *dev);
-Returns the DMA merge boundary. If the device cannot merge any the DMA address
+Returns the DMA merge boundary. If the device cannot merge any DMA address
segments, the function returns 0.
Part Id - Streaming DMA mappings
--------------------------------
+Streaming DMA allows to map an existing buffer for DMA transfers and then
+unmap it when finished. Map functions are not guaranteed to succeed, so the
+return value must be checked.
+
+.. note::
+
+ In particular, mapping may fail for memory not addressable by the
+ device, e.g. if it is not within the DMA mask of the device and/or a
+ connecting bus bridge. Streaming DMA functions try to overcome such
+ addressing constraints, either by using an IOMMU (a device which maps
+ I/O DMA addresses to physical memory addresses), or by copying the
+ data to/from a bounce buffer if the kernel is configured with a
+ :doc:`SWIOTLB <swiotlb>`. However, these methods are not always
+ available, and even if they are, they may still fail for a number of
+ reasons.
+
+ In short, a device driver may need to be wary of where buffers are
+ located in physical memory, especially if the DMA mask is less than 32
+ bits.
+
::
dma_addr_t
@@ -246,9 +213,7 @@ Part Id - Streaming DMA mappings
Maps a piece of processor virtual memory so it can be accessed by the
device and returns the DMA address of the memory.
-The direction for both APIs may be converted freely by casting.
-However the dma_API uses a strongly typed enumerator for its
-direction:
+The DMA API uses a strongly typed enumerator for its direction:
======================= =============================================
DMA_NONE no direction (used for debugging)
@@ -259,31 +224,13 @@ DMA_BIDIRECTIONAL direction isn't known
.. note::
- Not all memory regions in a machine can be mapped by this API.
- Further, contiguous kernel virtual space may not be contiguous as
+ Contiguous kernel virtual space may not be contiguous as
physical memory. Since this API does not provide any scatter/gather
capability, it will fail if the user tries to map a non-physically
contiguous piece of memory. For this reason, memory to be mapped by
this API should be obtained from sources which guarantee it to be
physically contiguous (like kmalloc).
- Further, the DMA address of the memory must be within the
- dma_mask of the device (the dma_mask is a bit mask of the
- addressable region for the device, i.e., if the DMA address of
- the memory ANDed with the dma_mask is still equal to the DMA
- address, then the device can perform DMA to the memory). To
- ensure that the memory allocated by kmalloc is within the dma_mask,
- the driver may specify various platform-dependent flags to restrict
- the DMA address range of the allocation (e.g., on x86, GFP_DMA
- guarantees to be within the first 16MB of available DMA addresses,
- as required by ISA devices).
-
- Note also that the above constraints on physical contiguity and
- dma_mask may not apply if the platform has an IOMMU (a device which
- maps an I/O DMA address to a physical memory address). However, to be
- portable, device driver writers may *not* assume that such an IOMMU
- exists.
-
.. warning::
Memory coherency operates at a granularity called the cache
@@ -325,8 +272,7 @@ DMA_BIDIRECTIONAL direction isn't known
enum dma_data_direction direction)
Unmaps the region previously mapped. All the parameters passed in
-must be identical to those passed in (and returned) by the mapping
-API.
+must be identical to those passed to (and returned by) dma_map_single().
::
@@ -376,10 +322,10 @@ action (e.g. reduce current DMA mapping usage or delay and try again later).
dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction direction)
-Returns: the number of DMA address segments mapped (this may be shorter
-than <nents> passed in if some elements of the scatter/gather list are
-physically or virtually adjacent and an IOMMU maps them with a single
-entry).
+Maps a scatter/gather list for DMA. Returns the number of DMA address segments
+mapped, which may be smaller than <nents> passed in if several consecutive
+sglist entries are merged (e.g. with an IOMMU, or if some adjacent segments
+just happen to be physically contiguous).
Please note that the sg cannot be mapped again if it has been mapped once.
The mapping process is allowed to destroy information in the sg.
@@ -403,9 +349,8 @@ With scatterlists, you use the resulting mapping like this::
where nents is the number of entries in the sglist.
The implementation is free to merge several consecutive sglist entries
-into one (e.g. with an IOMMU, or if several pages just happen to be
-physically contiguous) and returns the actual number of sg entries it
-mapped them to. On failure 0, is returned.
+into one. The returned number is the actual number of sg entries it
+mapped them to. On failure, 0 is returned.
Then you should loop count times (note: this can be less than nents times)
and use sg_dma_address() and sg_dma_len() macros where you previously
@@ -530,6 +475,77 @@ routines, e.g.:::
....
}
+Part Ie - IOVA-based DMA mappings
+---------------------------------
+
+These APIs allow a very efficient mapping when using an IOMMU. They are an
+optional path that requires extra code and are only recommended for drivers
+where DMA mapping performance, or the space usage for storing the DMA addresses
+matter. All the considerations from the previous section apply here as well.
+
+::
+
+ bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t size);
+
+Is used to try to allocate IOVA space for mapping operation. If it returns
+false this API can't be used for the given device and the normal streaming
+DMA mapping API should be used. The ``struct dma_iova_state`` is allocated
+by the driver and must be kept around until unmap time.
+
+::
+
+ static inline bool dma_use_iova(struct dma_iova_state *state)
+
+Can be used by the driver to check if the IOVA-based API is used after a
+call to dma_iova_try_alloc. This can be useful in the unmap path.
+
+::
+
+ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
+
+Is used to link ranges to the IOVA previously allocated. The start of all
+but the first call to dma_iova_link for a given state must be aligned
+to the DMA merge boundary returned by ``dma_get_merge_boundary())``, and
+the size of all but the last range must be aligned to the DMA merge boundary
+as well.
+
+::
+
+ int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size);
+
+Must be called to sync the IOMMU page tables for IOVA-range mapped by one or
+more calls to ``dma_iova_link()``.
+
+For drivers that use a one-shot mapping, all ranges can be unmapped and the
+IOVA freed by calling:
+
+::
+
+ void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs);
+
+Alternatively drivers can dynamically manage the IOVA space by unmapping
+and mapping individual regions. In that case
+
+::
+
+ void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
+
+is used to unmap a range previously mapped, and
+
+::
+
+ void dma_iova_free(struct device *dev, struct dma_iova_state *state);
+
+is used to free the IOVA space. All regions must have been unmapped using
+``dma_iova_unlink()`` before calling ``dma_iova_free()``.
Part II - Non-coherent DMA allocations
--------------------------------------
@@ -704,19 +720,19 @@ memory or doing partial flushes.
of two for easy alignment.
-Part III - Debug drivers use of the DMA-API
+Part III - Debug drivers use of the DMA API
-------------------------------------------
-The DMA-API as described above has some constraints. DMA addresses must be
+The DMA API as described above has some constraints. DMA addresses must be
released with the corresponding function with the same size for example. With
the advent of hardware IOMMUs it becomes more and more important that drivers
do not violate those constraints. In the worst case such a violation can
result in data corruption up to destroyed filesystems.
-To debug drivers and find bugs in the usage of the DMA-API checking code can
+To debug drivers and find bugs in the usage of the DMA API checking code can
be compiled into the kernel which will tell the developer about those
violations. If your architecture supports it you can select the "Enable
-debugging of DMA-API usage" option in your kernel configuration. Enabling this
+debugging of DMA API usage" option in your kernel configuration. Enabling this
option has a performance impact. Do not enable it in production kernels.
If you boot the resulting kernel will contain code which does some bookkeeping
@@ -745,7 +761,7 @@ example warning message may look like this::
[<ffffffff80235177>] find_busiest_group+0x207/0x8a0
[<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
[<ffffffff803c7ea3>] check_unmap+0x203/0x490
- [<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
+ [<ffffffff803c8259>] debug_dma_unmap_phys+0x49/0x50
[<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
[<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
[<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
@@ -755,7 +771,7 @@ example warning message may look like this::
<EOI> <4>---[ end trace f6435a98e2a38c0e ]---
The driver developer can find the driver and the device including a stacktrace
-of the DMA-API call which caused this warning.
+of the DMA API call which caused this warning.
Per default only the first error will result in a warning message. All other
errors will only silently counted. This limitation exist to prevent the code
@@ -763,7 +779,7 @@ from flooding your kernel log. To support debugging a device driver this can
be disabled via debugfs. See the debugfs interface documentation below for
details.
-The debugfs directory for the DMA-API debugging code is called dma-api/. In
+The debugfs directory for the DMA API debugging code is called dma-api/. In
this directory the following files can currently be found:
=============================== ===============================================
@@ -811,7 +827,7 @@ dma-api/driver_filter You can write a name of a driver into this file
If you have this code compiled into your kernel it will be enabled by default.
If you want to boot without the bookkeeping anyway you can provide
-'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
+'dma_debug=off' as a boot parameter. This will disable DMA API debugging.
Notice that you can not enable it again at runtime. You have to reboot to do
so.
@@ -839,8 +855,14 @@ that a driver may be leaking mappings.
dma-debug interface debug_dma_mapping_error() to debug drivers that fail
to check DMA mapping errors on addresses returned by dma_map_single() and
dma_map_page() interfaces. This interface clears a flag set by
-debug_dma_map_page() to indicate that dma_mapping_error() has been called by
+debug_dma_map_phys() to indicate that dma_mapping_error() has been called by
the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
this flag is still set, prints warning message that includes call trace that
leads up to the unmap. This interface can be called from dma_mapping_error()
routines to enable DMA mapping error check debugging.
+
+Functions and structures
+========================
+
+.. kernel-doc:: include/linux/scatterlist.h
+.. kernel-doc:: lib/scatterlist.c
diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
index 1887d92e8e92..0bdc2be65e57 100644
--- a/Documentation/core-api/dma-attributes.rst
+++ b/Documentation/core-api/dma-attributes.rst
@@ -130,3 +130,21 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
subsystem that the buffer is fully accessible at the elevated privilege
level (and ideally inaccessible or at least read-only at the
lesser-privileged levels).
+
+DMA_ATTR_MMIO
+-------------
+
+This attribute indicates the physical address is not normal system
+memory. It may not be used with kmap*()/phys_to_virt()/phys_to_page()
+functions, it may not be cacheable, and access using CPU load/store
+instructions may not be allowed.
+
+Usually this will be used to describe MMIO addresses, or other non-cacheable
+register addresses. When DMA mapping this sort of address we call
+the operation Peer to Peer as a one device is DMA'ing to another device.
+For PCI devices the p2pdma APIs must be used to determine if
+DMA_ATTR_MMIO is appropriate.
+
+For architectures that require cache flushing for DMA coherence
+DMA_ATTR_MMIO will not perform any cache flushing. The address
+provided must never be mapped cacheable into the CPU.
diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst
index e12f22ab33c7..71d8eedc0549 100644
--- a/Documentation/core-api/entry.rst
+++ b/Documentation/core-api/entry.rst
@@ -18,7 +18,7 @@ exceptions`_, `NMI and NMI-like exceptions`_.
Non-instrumentable code - noinstr
---------------------------------
-Most instrumentation facilities depend on RCU, so intrumentation is prohibited
+Most instrumentation facilities depend on RCU, so instrumentation is prohibited
for entry code before RCU starts watching and exit code after RCU stops
watching. In addition, many architectures must save and restore register state,
which means that (for example) a breakpoint in the breakpoint entry code would
@@ -105,7 +105,7 @@ has to do extra work between the various steps. In such cases it has to
ensure that enter_from_user_mode() is called first on entry and
exit_to_user_mode() is called last on exit.
-Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking
+Do not nest syscalls. Nested syscalls will cause RCU and/or context tracking
to print a warning.
KVM
@@ -115,8 +115,8 @@ Entering or exiting guest mode is very similar to syscalls. From the host
kernel point of view the CPU goes off into user space when entering the
guest and returns to the kernel on exit.
-kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
-and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode().
+guest_state_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
+and guest_state_exit_irqoff() is the KVM variant of enter_from_user_mode().
The state operations have the same ordering.
Task work handling is done separately for guest at the boundary of the
diff --git a/Documentation/core-api/floating-point.rst b/Documentation/core-api/floating-point.rst
new file mode 100644
index 000000000000..a8d0d4b05052
--- /dev/null
+++ b/Documentation/core-api/floating-point.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Floating-point API
+==================
+
+Kernel code is normally prohibited from using floating-point (FP) registers or
+instructions, including the C float and double data types. This rule reduces
+system call overhead, because the kernel does not need to save and restore the
+userspace floating-point register state.
+
+However, occasionally drivers or library functions may need to include FP code.
+This is supported by isolating the functions containing FP code to a separate
+translation unit (a separate source file), and saving/restoring the FP register
+state around calls to those functions. This creates "critical sections" of
+floating-point usage.
+
+The reason for this isolation is to prevent the compiler from generating code
+touching the FP registers outside these critical sections. Compilers sometimes
+use FP registers to optimize inlined ``memcpy`` or variable assignment, as
+floating-point registers may be wider than general-purpose registers.
+
+Usability of floating-point code within the kernel is architecture-specific.
+Additionally, because a single kernel may be configured to support platforms
+both with and without a floating-point unit, FPU availability must be checked
+both at build time and at run time.
+
+Several architectures implement the generic kernel floating-point API from
+``linux/fpu.h``, as described below. Some other architectures implement their
+own unique APIs, which are documented separately.
+
+Build-time API
+--------------
+
+Floating-point code may be built if the option ``ARCH_HAS_KERNEL_FPU_SUPPORT``
+is enabled. For C code, such code must be placed in a separate file, and that
+file must have its compilation flags adjusted using the following pattern::
+
+ CFLAGS_foo.o += $(CC_FLAGS_FPU)
+ CFLAGS_REMOVE_foo.o += $(CC_FLAGS_NO_FPU)
+
+Architectures are expected to define one or both of these variables in their
+top-level Makefile as needed. For example::
+
+ CC_FLAGS_FPU := -mhard-float
+
+or::
+
+ CC_FLAGS_NO_FPU := -msoft-float
+
+Normal kernel code is assumed to use the equivalent of ``CC_FLAGS_NO_FPU``.
+
+Runtime API
+-----------
+
+The runtime API is provided in ``linux/fpu.h``. This header cannot be included
+from files implementing FP code (those with their compilation flags adjusted as
+above). Instead, it must be included when defining the FP critical sections.
+
+.. c:function:: bool kernel_fpu_available( void )
+
+ This function reports if floating-point code can be used on this CPU or
+ platform. The value returned by this function is not expected to change
+ at runtime, so it only needs to be called once, not before every
+ critical section.
+
+.. c:function:: void kernel_fpu_begin( void )
+ void kernel_fpu_end( void )
+
+ These functions create a floating-point critical section. It is only
+ valid to call ``kernel_fpu_begin()`` after a previous call to
+ ``kernel_fpu_available()`` returned ``true``. These functions are only
+ guaranteed to be callable from (preemptible or non-preemptible) process
+ context.
+
+ Preemption may be disabled inside critical sections, so their size
+ should be minimized. They are *not* required to be reentrant. If the
+ caller expects to nest critical sections, it must implement its own
+ reference counting.
diff --git a/Documentation/core-api/folio_queue.rst b/Documentation/core-api/folio_queue.rst
new file mode 100644
index 000000000000..b7628896d2b6
--- /dev/null
+++ b/Documentation/core-api/folio_queue.rst
@@ -0,0 +1,209 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+===========
+Folio Queue
+===========
+
+:Author: David Howells <dhowells@redhat.com>
+
+.. Contents:
+
+ * Overview
+ * Initialisation
+ * Adding and removing folios
+ * Querying information about a folio
+ * Querying information about a folio_queue
+ * Folio queue iteration
+ * Folio marks
+ * Lockless simultaneous production/consumption issues
+
+
+Overview
+========
+
+The folio_queue struct forms a single segment in a segmented list of folios
+that can be used to form an I/O buffer. As such, the list can be iterated over
+using the ITER_FOLIOQ iov_iter type.
+
+The publicly accessible members of the structure are::
+
+ struct folio_queue {
+ struct folio_queue *next;
+ struct folio_queue *prev;
+ ...
+ };
+
+A pair of pointers are provided, ``next`` and ``prev``, that point to the
+segments on either side of the segment being accessed. Whilst this is a
+doubly-linked list, it is intentionally not a circular list; the outward
+sibling pointers in terminal segments should be NULL.
+
+Each segment in the list also stores:
+
+ * an ordered sequence of folio pointers,
+ * the size of each folio and
+ * three 1-bit marks per folio,
+
+but these should not be accessed directly as the underlying data structure may
+change, but rather the access functions outlined below should be used.
+
+The facility can be made accessible by::
+
+ #include <linux/folio_queue.h>
+
+and to use the iterator::
+
+ #include <linux/uio.h>
+
+
+Initialisation
+==============
+
+A segment should be initialised by calling::
+
+ void folioq_init(struct folio_queue *folioq);
+
+with a pointer to the segment to be initialised. Note that this will not
+necessarily initialise all the folio pointers, so care must be taken to check
+the number of folios added.
+
+
+Adding and removing folios
+==========================
+
+Folios can be set in the next unused slot in a segment struct by calling one
+of::
+
+ unsigned int folioq_append(struct folio_queue *folioq,
+ struct folio *folio);
+
+ unsigned int folioq_append_mark(struct folio_queue *folioq,
+ struct folio *folio);
+
+Both functions update the stored folio count, store the folio and note its
+size. The second function also sets the first mark for the folio added. Both
+functions return the number of the slot used. [!] Note that no attempt is made
+to check that the capacity wasn't overrun and the list will not be extended
+automatically.
+
+A folio can be excised by calling::
+
+ void folioq_clear(struct folio_queue *folioq, unsigned int slot);
+
+This clears the slot in the array and also clears all the marks for that folio,
+but doesn't change the folio count - so future accesses of that slot must check
+if the slot is occupied.
+
+
+Querying information about a folio
+==================================
+
+Information about the folio in a particular slot may be queried by the
+following function::
+
+ struct folio *folioq_folio(const struct folio_queue *folioq,
+ unsigned int slot);
+
+If a folio has not yet been set in that slot, this may yield an undefined
+pointer. The size of the folio in a slot may be queried with either of::
+
+ unsigned int folioq_folio_order(const struct folio_queue *folioq,
+ unsigned int slot);
+
+ size_t folioq_folio_size(const struct folio_queue *folioq,
+ unsigned int slot);
+
+The first function returns the size as an order and the second as a number of
+bytes.
+
+
+Querying information about a folio_queue
+========================================
+
+Information may be retrieved about a particular segment with the following
+functions::
+
+ unsigned int folioq_nr_slots(const struct folio_queue *folioq);
+
+ unsigned int folioq_count(struct folio_queue *folioq);
+
+ bool folioq_full(struct folio_queue *folioq);
+
+The first function returns the maximum capacity of a segment. It must not be
+assumed that this won't vary between segments. The second returns the number
+of folios added to a segments and the third is a shorthand to indicate if the
+segment has been filled to capacity.
+
+Not that the count and fullness are not affected by clearing folios from the
+segment. These are more about indicating how many slots in the array have been
+initialised, and it assumed that slots won't get reused, but rather the segment
+will get discarded as the queue is consumed.
+
+
+Folio marks
+===========
+
+Folios within a queue can also have marks assigned to them. These marks can be
+used to note information such as if a folio needs folio_put() calling upon it.
+There are three marks available to be set for each folio.
+
+The marks can be set by::
+
+ void folioq_mark(struct folio_queue *folioq, unsigned int slot);
+ void folioq_mark2(struct folio_queue *folioq, unsigned int slot);
+
+Cleared by::
+
+ void folioq_unmark(struct folio_queue *folioq, unsigned int slot);
+ void folioq_unmark2(struct folio_queue *folioq, unsigned int slot);
+
+And the marks can be queried by::
+
+ bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot);
+ bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot);
+
+The marks can be used for any purpose and are not interpreted by this API.
+
+
+Folio queue iteration
+=====================
+
+A list of segments may be iterated over using the I/O iterator facility using
+an ``iov_iter`` iterator of ``ITER_FOLIOQ`` type. The iterator may be
+initialised with::
+
+ void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction,
+ const struct folio_queue *folioq,
+ unsigned int first_slot, unsigned int offset,
+ size_t count);
+
+This may be told to start at a particular segment, slot and offset within a
+queue. The iov iterator functions will follow the next pointers when advancing
+and prev pointers when reverting when needed.
+
+
+Lockless simultaneous production/consumption issues
+===================================================
+
+If properly managed, the list can be extended by the producer at the head end
+and shortened by the consumer at the tail end simultaneously without the need
+to take locks. The ITER_FOLIOQ iterator inserts appropriate barriers to aid
+with this.
+
+Care must be taken when simultaneously producing and consuming a list. If the
+last segment is reached and the folios it refers to are entirely consumed by
+the IOV iterators, an iov_iter struct will be left pointing to the last segment
+with a slot number equal to the capacity of that segment. The iterator will
+try to continue on from this if there's another segment available when it is
+used again, but care must be taken lest the segment got removed and freed by
+the consumer before the iterator was advanced.
+
+It is recommended that the queue always contain at least one segment, even if
+that segment has never been filled or is entirely spent. This prevents the
+head and tail pointers from collapsing.
+
+
+API Function Reference
+======================
+
+.. kernel-doc:: include/linux/folio_queue.h
diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst
index 4a460639ab1c..582bde9bf5a9 100644
--- a/Documentation/core-api/genericirq.rst
+++ b/Documentation/core-api/genericirq.rst
@@ -210,7 +210,7 @@ implemented (simplified excerpt)::
}
}
- noop(struct irq_data *data))
+ noop(struct irq_data *data)
{
}
diff --git a/Documentation/core-api/gfp_mask-from-fs-io.rst b/Documentation/core-api/gfp_mask-from-fs-io.rst
index e7c32a8de126..858b2fbcb36c 100644
--- a/Documentation/core-api/gfp_mask-from-fs-io.rst
+++ b/Documentation/core-api/gfp_mask-from-fs-io.rst
@@ -55,14 +55,16 @@ scope.
What about __vmalloc(GFP_NOFS)
==============================
-vmalloc doesn't support GFP_NOFS semantic because there are hardcoded
-GFP_KERNEL allocations deep inside the allocator which are quite non-trivial
-to fix up. That means that calling ``vmalloc`` with GFP_NOFS/GFP_NOIO is
-almost always a bug. The good news is that the NOFS/NOIO semantic can be
-achieved by the scope API.
+Since v5.17, and specifically after the commit 451769ebb7e79 ("mm/vmalloc:
+alloc GFP_NO{FS,IO} for vmalloc"), GFP_NOFS/GFP_NOIO are now supported in
+``[k]vmalloc`` by implicitly using scope API.
+
+In earlier kernels ``vmalloc`` didn't support GFP_NOFS semantic because there
+were hardcoded GFP_KERNEL allocations deep inside the allocator. That means
+that calling ``vmalloc`` with GFP_NOFS/GFP_NOIO was almost always a bug.
In the ideal world, upper layers should already mark dangerous contexts
-and so no special care is required and vmalloc should be called without
-any problems. Sometimes if the context is not really clear or there are
-layering violations then the recommended way around that is to wrap ``vmalloc``
-by the scope API with a comment explaining the problem.
+and so no special care is required and ``vmalloc`` should be called without any
+problems. Sometimes if the context is not really clear or there are layering
+violations then the recommended way around that (on pre-v5.17 kernels) is to
+wrap ``vmalloc`` by the scope API with a comment explaining the problem.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 7a3a08d81f11..5eb0fbbbc323 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -24,6 +24,7 @@ it.
printk-index
symbol-namespaces
asm-annotations
+ real-time/index
Data structures and low-level utilities
=======================================
@@ -35,7 +36,9 @@ Library functionality that is used throughout the kernel.
kobject
kref
+ cleanup
assoc_array
+ folio_queue
xarray
maple_tree
idr
@@ -48,6 +51,11 @@ Library functionality that is used throughout the kernel.
errseq
wrappers/atomic_t
wrappers/atomic_bitops
+ floating-point
+ union_find
+ min_heap
+ parser
+ list
Low level entry and exit
========================
@@ -102,11 +110,14 @@ more memory-management documentation in Documentation/mm/index.rst.
dma-api-howto
dma-attributes
dma-isa-lpc
+ swiotlb
mm-api
+ cgroup
genalloc
pin_user_pages
boot-time-mm
gfp_mask-from-fs-io
+ kho/index
Interfaces for kernel debugging
===============================
@@ -127,6 +138,7 @@ Documents that don't fit elsewhere or which have yet to be categorized.
:maxdepth: 1
librs
+ liveupdate
netlink
.. only:: subproject and html
diff --git a/Documentation/core-api/irq/concepts.rst b/Documentation/core-api/irq/concepts.rst
index 4273806a606b..7c4564f3cbdf 100644
--- a/Documentation/core-api/irq/concepts.rst
+++ b/Documentation/core-api/irq/concepts.rst
@@ -2,23 +2,24 @@
What is an IRQ?
===============
-An IRQ is an interrupt request from a device.
-Currently they can come in over a pin, or over a packet.
-Several devices may be connected to the same pin thus
-sharing an IRQ.
+An IRQ is an interrupt request from a device. Currently, they can come
+in over a pin, or over a packet. Several devices may be connected to
+the same pin thus sharing an IRQ. Such as on legacy PCI bus: All devices
+typically share 4 lanes/pins. Note that each device can request an
+interrupt on each of the lanes.
An IRQ number is a kernel identifier used to talk about a hardware
-interrupt source. Typically this is an index into the global irq_desc
-array, but except for what linux/interrupt.h implements the details
-are architecture specific.
+interrupt source. Typically, this is an index into the global irq_desc
+array or sparse_irqs tree. But except for what linux/interrupt.h
+implements, the details are architecture specific.
An IRQ number is an enumeration of the possible interrupt sources on a
-machine. Typically what is enumerated is the number of input pins on
-all of the interrupt controller in the system. In the case of ISA
-what is enumerated are the 16 input pins on the two i8259 interrupt
-controllers.
+machine. Typically, what is enumerated is the number of input pins on
+all of the interrupt controllers in the system. In the case of ISA,
+what is enumerated are the 8 input pins on each of the two i8259
+interrupt controllers.
Architectures can assign additional meaning to the IRQ numbers, and
-are encouraged to in the case where there is any manual configuration
-of the hardware involved. The ISA IRQs are a classic example of
+are encouraged to in the case where there is any manual configuration
+of the hardware involved. The ISA IRQs are a classic example of
assigning this kind of additional meaning.
diff --git a/Documentation/core-api/irq/irq-affinity.rst b/Documentation/core-api/irq/irq-affinity.rst
index 29da5000836a..9cb460cf60b6 100644
--- a/Documentation/core-api/irq/irq-affinity.rst
+++ b/Documentation/core-api/irq/irq-affinity.rst
@@ -9,9 +9,9 @@ ChangeLog:
/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
which target CPUs are permitted for a given IRQ source. It's a bitmask
-(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not
+(smp_affinity) or CPU list (smp_affinity_list) of allowed CPUs. It's not
allowed to turn off all CPUs, and if an IRQ controller does not support
-IRQ affinity then the value will not change from the default of all cpus.
+IRQ affinity then the value will not change from the default of all CPUs.
/proc/irq/default_smp_affinity specifies default affinity mask that applies
to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
@@ -60,7 +60,7 @@ Now lets restrict that IRQ to CPU(4-7).
This time around IRQ44 was delivered only to the last four processors.
i.e counters for the CPU0-3 did not change.
-Here is an example of limiting that same irq (44) to cpus 1024 to 1031::
+Here is an example of limiting that same IRQ (44) to CPUs 1024 to 1031::
[root@moon 44]# echo 1024-1031 > smp_affinity_list
[root@moon 44]# cat smp_affinity_list
diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst
index f88a6ee67a35..68eb2612e8a4 100644
--- a/Documentation/core-api/irq/irq-domain.rst
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -1,75 +1,91 @@
===============================================
-The irq_domain interrupt number mapping library
+The irq_domain Interrupt Number Mapping Library
===============================================
The current design of the Linux kernel uses a single large number
-space where each separate IRQ source is assigned a different number.
-This is simple when there is only one interrupt controller, but in
-systems with multiple interrupt controllers the kernel must ensure
+space where each separate IRQ source is assigned a unique number.
+This is simple when there is only one interrupt controller. But in
+systems with multiple interrupt controllers, the kernel must ensure
that each one gets assigned non-overlapping allocations of Linux
IRQ numbers.
The number of interrupt controllers registered as unique irqchips
-show a rising tendency: for example subdrivers of different kinds
+shows a rising tendency. For example, subdrivers of different kinds
such as GPIO controllers avoid reimplementing identical callback
mechanisms as the IRQ core system by modelling their interrupt
-handlers as irqchips, i.e. in effect cascading interrupt controllers.
+handlers as irqchips. I.e. in effect cascading interrupt controllers.
-Here the interrupt number loose all kind of correspondence to
-hardware interrupt numbers: whereas in the past, IRQ numbers could
-be chosen so they matched the hardware IRQ line into the root
-interrupt controller (i.e. the component actually fireing the
-interrupt line to the CPU) nowadays this number is just a number.
+So in the past, IRQ numbers could be chosen so that they match the
+hardware IRQ line into the root interrupt controller (i.e. the
+component actually firing the interrupt line to the CPU). Nowadays,
+this number is just a number and the number has no
+relationship to hardware interrupt numbers.
-For this reason we need a mechanism to separate controller-local
-interrupt numbers, called hardware irq's, from Linux IRQ numbers.
+For this reason, we need a mechanism to separate controller-local
+interrupt numbers, called hardware IRQs, from Linux IRQ numbers.
The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of
-irq numbers, but they don't provide any support for reverse mapping of
+IRQ numbers, but they don't provide any support for reverse mapping of
the controller-local IRQ (hwirq) number into the Linux IRQ number
space.
-The irq_domain library adds mapping between hwirq and IRQ numbers on
-top of the irq_alloc_desc*() API. An irq_domain to manage mapping is
-preferred over interrupt controller drivers open coding their own
+The irq_domain library adds a mapping between hwirq and IRQ numbers on
+top of the irq_alloc_desc*() API. An irq_domain to manage the mapping
+is preferred over interrupt controller drivers open coding their own
reverse mapping scheme.
-irq_domain also implements translation from an abstract irq_fwspec
-structure to hwirq numbers (Device Tree and ACPI GSI so far), and can
-be easily extended to support other IRQ topology data sources.
+irq_domain also implements a translation from an abstract struct
+irq_fwspec to hwirq numbers (Device Tree, non-DT firmware node, ACPI
+GSI, and software node so far), and can be easily extended to support
+other IRQ topology data sources. The implementation is performed
+without any extra platform support code.
-irq_domain usage
+irq_domain Usage
================
-
-An interrupt controller driver creates and registers an irq_domain by
-calling one of the irq_domain_add_*() or irq_domain_create_*() functions
-(each mapping method has a different allocator function, more on that later).
-The function will return a pointer to the irq_domain on success. The caller
-must provide the allocator function with an irq_domain_ops structure.
+struct irq_domain could be defined as an irq domain controller. That
+is, it handles the mapping between hardware and virtual interrupt
+numbers for a given interrupt domain. The domain structure is
+generally created by the PIC code for a given PIC instance (though a
+domain can cover more than one PIC if they have a flat number model).
+It is the domain callbacks that are responsible for setting the
+irq_chip on a given irq_desc after it has been mapped.
+
+The host code and data structures use a fwnode_handle pointer to
+identify the domain. In some cases, and in order to preserve source
+code compatibility, this fwnode pointer is "upgraded" to a DT
+device_node. For those firmware infrastructures that do not provide a
+unique identifier for an interrupt controller, the irq_domain code
+offers a fwnode allocator.
+
+An interrupt controller driver creates and registers a struct irq_domain
+by calling one of the irq_domain_create_*() functions (each mapping
+method has a different allocator function, more on that later). The
+function will return a pointer to the struct irq_domain on success. The
+caller must provide the allocator function with a struct irq_domain_ops
+pointer.
In most cases, the irq_domain will begin empty without any mappings
between hwirq and IRQ numbers. Mappings are added to the irq_domain
by calling irq_create_mapping() which accepts the irq_domain and a
-hwirq number as arguments. If a mapping for the hwirq doesn't already
-exist then it will allocate a new Linux irq_desc, associate it with
-the hwirq, and call the .map() callback so the driver can perform any
-required hardware setup.
+hwirq number as arguments. If a mapping for the hwirq doesn't already
+exist, irq_create_mapping() allocates a new Linux irq_desc, associates
+it with the hwirq, and calls the :c:member:`irq_domain_ops.map()`
+callback. In there, the driver can perform any required hardware
+setup.
Once a mapping has been established, it can be retrieved or used via a
variety of methods:
- irq_resolve_mapping() returns a pointer to the irq_desc structure
- for a given domain and hwirq number, and NULL if there was no
+ for a given domain and hwirq number, or NULL if there was no
mapping.
- irq_find_mapping() returns a Linux IRQ number for a given domain and
- hwirq number, and 0 if there was no mapping
-- irq_linear_revmap() is now identical to irq_find_mapping(), and is
- deprecated
+ hwirq number, or 0 if there was no mapping
- generic_handle_domain_irq() handles an interrupt described by a
domain and a hwirq number
-Note that irq domain lookups must happen in contexts that are
-compatible with a RCU read-side critical section.
+Note that irq_domain lookups must happen in contexts that are
+compatible with an RCU read-side critical section.
The irq_create_mapping() function must be called *at least once*
before any call to irq_find_mapping(), lest the descriptor will not
@@ -77,13 +93,14 @@ be allocated.
If the driver has the Linux IRQ number or the irq_data pointer, and
needs to know the associated hwirq number (such as in the irq_chip
-callbacks) then it can be directly obtained from irq_data->hwirq.
+callbacks) then it can be directly obtained from
+:c:member:`irq_data.hwirq`.
-Types of irq_domain mappings
+Types of irq_domain Mappings
============================
There are several mechanisms available for reverse mapping from hwirq
-to Linux irq, and each mechanism uses a different allocation function.
+to Linux IRQ, and each mechanism uses a different allocation function.
Which reverse map type should be used depends on the use case. Each
of the reverse map types are described below:
@@ -92,48 +109,36 @@ Linear
::
- irq_domain_add_linear()
irq_domain_create_linear()
-The linear reverse map maintains a fixed size table indexed by the
+The linear reverse map maintains a fixed-size table indexed by the
hwirq number. When a hwirq is mapped, an irq_desc is allocated for
the hwirq, and the IRQ number is stored in the table.
The Linear map is a good choice when the maximum number of hwirqs is
fixed and a relatively small number (~ < 256). The advantages of this
-map are fixed time lookup for IRQ numbers, and irq_descs are only
+map are fixed-time lookup for IRQ numbers, and irq_descs are only
allocated for in-use IRQs. The disadvantage is that the table must be
as large as the largest possible hwirq number.
-irq_domain_add_linear() and irq_domain_create_linear() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
-The majority of drivers should use the linear map.
+The majority of drivers should use the Linear map.
Tree
----
::
- irq_domain_add_tree()
irq_domain_create_tree()
The irq_domain maintains a radix tree map from hwirq numbers to Linux
IRQs. When an hwirq is mapped, an irq_desc is allocated and the
hwirq is used as the lookup key for the radix tree.
-The tree map is a good choice if the hwirq number can be very large
+The Tree map is a good choice if the hwirq number can be very large
since it doesn't need to allocate a table as large as the largest
hwirq number. The disadvantage is that hwirq to IRQ number lookup is
dependent on how many entries are in the table.
-irq_domain_add_tree() and irq_domain_create_tree() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
Very few drivers should need this mapping.
No Map
@@ -141,7 +146,7 @@ No Map
::
- irq_domain_add_nomap()
+ irq_domain_create_nomap()
The No Map mapping is to be used when the hwirq number is
programmable in the hardware. In this case it is best to program the
@@ -159,17 +164,15 @@ Legacy
::
- irq_domain_add_simple()
- irq_domain_add_legacy()
irq_domain_create_simple()
irq_domain_create_legacy()
The Legacy mapping is a special case for drivers that already have a
range of irq_descs allocated for the hwirqs. It is used when the
-driver cannot be immediately converted to use the linear mapping. For
+driver cannot be immediately converted to use the Linear mapping. For
example, many embedded system board support files use a set of #defines
for IRQ numbers that are passed to struct device registrations. In that
-case the Linux IRQ numbers cannot be dynamically assigned and the legacy
+case the Linux IRQ numbers cannot be dynamically assigned and the Legacy
mapping should be used.
As the name implies, the \*_legacy() functions are deprecated and only
@@ -177,25 +180,25 @@ exist to ease the support of ancient platforms. No new users should be
added. Same goes for the \*_simple() functions when their use results
in the legacy behaviour.
-The legacy map assumes a contiguous range of IRQ numbers has already
+The Legacy map assumes a contiguous range of IRQ numbers has already
been allocated for the controller and that the IRQ number can be
calculated by adding a fixed offset to the hwirq number, and
visa-versa. The disadvantage is that it requires the interrupt
controller to manage IRQ allocations and it requires an irq_desc to be
allocated for every hwirq, even if it is unused.
-The legacy map should only be used if fixed IRQ mappings must be
-supported. For example, ISA controllers would use the legacy map for
+The Legacy map should only be used if fixed IRQ mappings must be
+supported. For example, ISA controllers would use the Legacy map for
mapping Linux IRQs 0-15 so that existing ISA drivers get the correct IRQ
numbers.
-Most users of legacy mappings should use irq_domain_add_simple() or
-irq_domain_create_simple() which will use a legacy domain only if an IRQ range
-is supplied by the system and will otherwise use a linear domain mapping.
-The semantics of this call are such that if an IRQ range is specified then
-descriptors will be allocated on-the-fly for it, and if no range is
-specified it will fall through to irq_domain_add_linear() or
-irq_domain_create_linear() which means *no* irq descriptors will be allocated.
+Most users of legacy mappings should use irq_domain_create_simple()
+which will use a legacy domain only if an IRQ range is supplied by the
+system and will otherwise use a linear domain mapping. The semantics of
+this call are such that if an IRQ range is specified then descriptors
+will be allocated on-the-fly for it, and if no range is specified it
+will fall through to irq_domain_create_linear() which means *no* IRQ
+descriptors will be allocated.
A typical use case for simple domains is where an irqchip provider
is supporting both dynamic and static IRQ assignments.
@@ -206,18 +209,12 @@ that the driver using the simple domain call irq_create_mapping()
before any irq_find_mapping() since the latter will actually work
for the static IRQ assignment case.
-irq_domain_add_simple() and irq_domain_create_simple() as well as
-irq_domain_add_legacy() and irq_domain_create_legacy() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
-Hierarchy IRQ domain
+Hierarchy IRQ Domain
--------------------
On some architectures, there may be multiple interrupt controllers
involved in delivering an interrupt from the device to the target CPU.
-Let's look at a typical interrupt delivering path on x86 platforms::
+Let's look at a typical interrupt delivery path on x86 platforms::
Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
@@ -230,8 +227,8 @@ There are three interrupt controllers involved:
To support such a hardware topology and make software architecture match
hardware architecture, an irq_domain data structure is built for each
interrupt controller and those irq_domains are organized into hierarchy.
-When building irq_domain hierarchy, the irq_domain near to the device is
-child and the irq_domain near to CPU is parent. So a hierarchy structure
+When building irq_domain hierarchy, the irq_domain nearest the device is
+child and the irq_domain nearest the CPU is parent. So a hierarchy structure
as below will be built for the example above::
CPU Vector irq_domain (root irq_domain to manage CPU vectors)
@@ -253,20 +250,40 @@ There are four major interfaces to use hierarchy irq_domain:
4) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
to stop delivering the interrupt.
-Following changes are needed to support hierarchy irq_domain:
+The following is needed to support hierarchy irq_domain:
-1) a new field 'parent' is added to struct irq_domain; it's used to
+1) The :c:member:`parent` field in struct irq_domain is used to
maintain irq_domain hierarchy information.
-2) a new field 'parent_data' is added to struct irq_data; it's used to
- build hierarchy irq_data to match hierarchy irq_domains. The irq_data
- is used to store irq_domain pointer and hardware irq number.
-3) new callbacks are added to struct irq_domain_ops to support hierarchy
- irq_domain operations.
-
-With support of hierarchy irq_domain and hierarchy irq_data ready, an
-irq_domain structure is built for each interrupt controller, and an
+2) The :c:member:`parent_data` field in struct irq_data is used to
+ build hierarchy irq_data to match hierarchy irq_domains. The
+ irq_data is used to store irq_domain pointer and hardware irq
+ number.
+3) The :c:member:`alloc()`, :c:member:`free()`, and other callbacks in
+ struct irq_domain_ops to support hierarchy irq_domain operations.
+
+With the support of hierarchy irq_domain and hierarchy irq_data ready,
+an irq_domain structure is built for each interrupt controller, and an
irq_data structure is allocated for each irq_domain associated with an
-IRQ. Now we could go one step further to support stacked(hierarchy)
+IRQ.
+
+For an interrupt controller driver to support hierarchy irq_domain, it
+needs to:
+
+1) Implement irq_domain_ops.alloc() and irq_domain_ops.free()
+2) Optionally, implement irq_domain_ops.activate() and
+ irq_domain_ops.deactivate().
+3) Optionally, implement an irq_chip to manage the interrupt controller
+ hardware.
+4) There is no need to implement irq_domain_ops.map() and
+ irq_domain_ops.unmap(). They are unused with hierarchy irq_domain.
+
+Note the hierarchy irq_domain is in no way x86-specific, and is
+heavily used to support other architectures, such as ARM, ARM64 etc.
+
+Stacked irq_chip
+~~~~~~~~~~~~~~~~
+
+Now, we could go one step further to support stacked (hierarchy)
irq_chip. That is, an irq_chip is associated with each irq_data along
the hierarchy. A child irq_chip may implement a required action by
itself or by cooperating with its parent irq_chip.
@@ -276,22 +293,28 @@ with the hardware managed by itself and may ask for services from its
parent irq_chip when needed. So we could achieve a much cleaner
software architecture.
-For an interrupt controller driver to support hierarchy irq_domain, it
-needs to:
-
-1) Implement irq_domain_ops.alloc and irq_domain_ops.free
-2) Optionally implement irq_domain_ops.activate and
- irq_domain_ops.deactivate.
-3) Optionally implement an irq_chip to manage the interrupt controller
- hardware.
-4) No need to implement irq_domain_ops.map and irq_domain_ops.unmap,
- they are unused with hierarchy irq_domain.
-
-Hierarchy irq_domain is in no way x86 specific, and is heavily used to
-support other architectures, such as ARM, ARM64 etc.
-
Debugging
=========
Most of the internals of the IRQ subsystem are exposed in debugfs by
turning CONFIG_GENERIC_IRQ_DEBUGFS on.
+
+Structures and Public Functions Provided
+========================================
+
+This chapter contains the autogenerated documentation of the structures
+and exported kernel API functions which are used for IRQ domains.
+
+.. kernel-doc:: include/linux/irqdomain.h
+
+.. kernel-doc:: kernel/irq/irqdomain.c
+ :export:
+
+Internal Functions Provided
+===========================
+
+This chapter contains the autogenerated documentation of the internal
+functions.
+
+.. kernel-doc:: kernel/irq/irqdomain.c
+ :internal:
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index ae92a2571388..e8211c4ca662 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -3,12 +3,6 @@ The Linux Kernel API
====================
-List Management Functions
-=========================
-
-.. kernel-doc:: include/linux/list.h
- :internal:
-
Basic C Library Functions
=========================
@@ -136,26 +130,28 @@ Arithmetic Overflow Checking
CRC Functions
-------------
-.. kernel-doc:: lib/crc4.c
+.. kernel-doc:: lib/crc/crc4.c
:export:
-.. kernel-doc:: lib/crc7.c
+.. kernel-doc:: lib/crc/crc7.c
:export:
-.. kernel-doc:: lib/crc8.c
+.. kernel-doc:: lib/crc/crc8.c
:export:
-.. kernel-doc:: lib/crc16.c
+.. kernel-doc:: lib/crc/crc16.c
:export:
-.. kernel-doc:: lib/crc32.c
-
-.. kernel-doc:: lib/crc-ccitt.c
+.. kernel-doc:: lib/crc/crc-ccitt.c
:export:
-.. kernel-doc:: lib/crc-itu-t.c
+.. kernel-doc:: lib/crc/crc-itu-t.c
:export:
+.. kernel-doc:: include/linux/crc32.h
+
+.. kernel-doc:: include/linux/crc64.h
+
Base 2 log and power Functions
------------------------------
diff --git a/Documentation/core-api/kho/bindings/kho.yaml b/Documentation/core-api/kho/bindings/kho.yaml
new file mode 100644
index 000000000000..11e8ab7b219d
--- /dev/null
+++ b/Documentation/core-api/kho/bindings/kho.yaml
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+title: Kexec HandOver (KHO) root tree
+
+maintainers:
+ - Mike Rapoport <rppt@kernel.org>
+ - Changyuan Lyu <changyuanl@google.com>
+
+description: |
+ System memory preserved by KHO across kexec.
+
+properties:
+ compatible:
+ enum:
+ - kho-v1
+
+ preserved-memory-map:
+ description: |
+ physical address (u64) of an in-memory structure describing all preserved
+ folios and memory ranges.
+
+patternProperties:
+ "$[0-9a-f_]+^":
+ $ref: sub-fdt.yaml#
+ description: physical address of a KHO user's own FDT.
+
+required:
+ - compatible
+ - preserved-memory-map
+
+additionalProperties: false
+
+examples:
+ - |
+ kho {
+ compatible = "kho-v1";
+ preserved-memory-map = <0xf0be16 0x1000000>;
+
+ memblock {
+ fdt = <0x80cc16 0x1000000>;
+ };
+ };
diff --git a/Documentation/core-api/kho/bindings/memblock/memblock.yaml b/Documentation/core-api/kho/bindings/memblock/memblock.yaml
new file mode 100644
index 000000000000..d388c28eb91d
--- /dev/null
+++ b/Documentation/core-api/kho/bindings/memblock/memblock.yaml
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+title: Memblock reserved memory
+
+maintainers:
+ - Mike Rapoport <rppt@kernel.org>
+
+description: |
+ Memblock can serialize its current memory reservations created with
+ reserve_mem command line option across kexec through KHO.
+ The post-KHO kernel can then consume these reservations and they are
+ guaranteed to have the same physical address.
+
+properties:
+ compatible:
+ enum:
+ - reserve-mem-v1
+
+patternProperties:
+ "$[0-9a-f_]+^":
+ $ref: reserve-mem.yaml#
+ description: reserved memory regions
+
+required:
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ memblock {
+ compatible = "memblock-v1";
+ n1 {
+ compatible = "reserve-mem-v1";
+ start = <0xc06b 0x4000000>;
+ size = <0x04 0x00>;
+ };
+ };
diff --git a/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml b/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml
new file mode 100644
index 000000000000..10282d3d1bcd
--- /dev/null
+++ b/Documentation/core-api/kho/bindings/memblock/reserve-mem.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+title: Memblock reserved memory regions
+
+maintainers:
+ - Mike Rapoport <rppt@kernel.org>
+
+description: |
+ Memblock can serialize its current memory reservations created with
+ reserve_mem command line option across kexec through KHO.
+ This object describes each such region.
+
+properties:
+ compatible:
+ enum:
+ - reserve-mem-v1
+
+ start:
+ description: |
+ physical address (u64) of the reserved memory region.
+
+ size:
+ description: |
+ size (u64) of the reserved memory region.
+
+required:
+ - compatible
+ - start
+ - size
+
+additionalProperties: false
+
+examples:
+ - |
+ n1 {
+ compatible = "reserve-mem-v1";
+ start = <0xc06b 0x4000000>;
+ size = <0x04 0x00>;
+ };
diff --git a/Documentation/core-api/kho/bindings/sub-fdt.yaml b/Documentation/core-api/kho/bindings/sub-fdt.yaml
new file mode 100644
index 000000000000..b9a3d2d24850
--- /dev/null
+++ b/Documentation/core-api/kho/bindings/sub-fdt.yaml
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+title: KHO users' FDT address
+
+maintainers:
+ - Mike Rapoport <rppt@kernel.org>
+ - Changyuan Lyu <changyuanl@google.com>
+
+description: |
+ Physical address of an FDT blob registered by a KHO user.
+
+properties:
+ fdt:
+ description: |
+ physical address (u64) of an FDT blob.
+
+required:
+ - fdt
+
+additionalProperties: false
+
+examples:
+ - |
+ memblock {
+ fdt = <0x80cc16 0x1000000>;
+ };
diff --git a/Documentation/core-api/kho/concepts.rst b/Documentation/core-api/kho/concepts.rst
new file mode 100644
index 000000000000..d626d1dbd678
--- /dev/null
+++ b/Documentation/core-api/kho/concepts.rst
@@ -0,0 +1,74 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+.. _kho-concepts:
+
+=======================
+Kexec Handover Concepts
+=======================
+
+Kexec HandOver (KHO) is a mechanism that allows Linux to preserve memory
+regions, which could contain serialized system states, across kexec.
+
+It introduces multiple concepts:
+
+KHO FDT
+=======
+
+Every KHO kexec carries a KHO specific flattened device tree (FDT) blob
+that describes preserved memory regions. These regions contain either
+serialized subsystem states, or in-memory data that shall not be touched
+across kexec. After KHO, subsystems can retrieve and restore preserved
+memory regions from KHO FDT.
+
+KHO only uses the FDT container format and libfdt library, but does not
+adhere to the same property semantics that normal device trees do: Properties
+are passed in native endianness and standardized properties like ``regs`` and
+``ranges`` do not exist, hence there are no ``#...-cells`` properties.
+
+KHO is still under development. The FDT schema is unstable and would change
+in the future.
+
+Scratch Regions
+===============
+
+To boot into kexec, we need to have a physically contiguous memory range that
+contains no handed over memory. Kexec then places the target kernel and initrd
+into that region. The new kernel exclusively uses this region for memory
+allocations before during boot up to the initialization of the page allocator.
+
+We guarantee that we always have such regions through the scratch regions: On
+first boot KHO allocates several physically contiguous memory regions. Since
+after kexec these regions will be used by early memory allocations, there is a
+scratch region per NUMA node plus a scratch region to satisfy allocations
+requests that do not require particular NUMA node assignment.
+By default, size of the scratch region is calculated based on amount of memory
+allocated during boot. The ``kho_scratch`` kernel command line option may be
+used to explicitly define size of the scratch regions.
+The scratch regions are declared as CMA when page allocator is initialized so
+that their memory can be used during system lifetime. CMA gives us the
+guarantee that no handover pages land in that region, because handover pages
+must be at a static physical memory location and CMA enforces that only
+movable pages can be located inside.
+
+After KHO kexec, we ignore the ``kho_scratch`` kernel command line option and
+instead reuse the exact same region that was originally allocated. This allows
+us to recursively execute any amount of KHO kexecs. Because we used this region
+for boot memory allocations and as target memory for kexec blobs, some parts
+of that memory region may be reserved. These reservations are irrelevant for
+the next KHO, because kexec can overwrite even the original kernel.
+
+.. _kho-finalization-phase:
+
+KHO finalization phase
+======================
+
+To enable user space based kexec file loader, the kernel needs to be able to
+provide the FDT that describes the current kernel's state before
+performing the actual kexec. The process of generating that FDT is
+called serialization. When the FDT is generated, some properties
+of the system may become immutable because they are already written down
+in the FDT. That state is called the KHO finalization phase.
+
+Public API
+==========
+.. kernel-doc:: kernel/liveupdate/kexec_handover.c
+ :export:
diff --git a/Documentation/core-api/kho/fdt.rst b/Documentation/core-api/kho/fdt.rst
new file mode 100644
index 000000000000..62505285d60d
--- /dev/null
+++ b/Documentation/core-api/kho/fdt.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+=======
+KHO FDT
+=======
+
+KHO uses the flattened device tree (FDT) container format and libfdt
+library to create and parse the data that is passed between the
+kernels. The properties in KHO FDT are stored in native format.
+It includes the physical address of an in-memory structure describing
+all preserved memory regions, as well as physical addresses of KHO users'
+own FDTs. Interpreting those sub FDTs is the responsibility of KHO users.
+
+KHO nodes and properties
+========================
+
+Property ``preserved-memory-map``
+---------------------------------
+
+KHO saves a special property named ``preserved-memory-map`` under the root node.
+This node contains the physical address of an in-memory structure for KHO to
+preserve memory regions across kexec.
+
+Property ``compatible``
+-----------------------
+
+The ``compatible`` property determines compatibility between the kernel
+that created the KHO FDT and the kernel that attempts to load it.
+If the kernel that loads the KHO FDT is not compatible with it, the entire
+KHO process will be bypassed.
+
+Property ``fdt``
+----------------
+
+Generally, a KHO user serialize its state into its own FDT and instructs
+KHO to preserve the underlying memory, such that after kexec, the new kernel
+can recover its state from the preserved FDT.
+
+A KHO user thus can create a node in KHO root tree and save the physical address
+of its own FDT in that node's property ``fdt`` .
+
+Examples
+========
+
+The following example demonstrates KHO FDT that preserves two memory
+regions created with ``reserve_mem`` kernel command line parameter::
+
+ /dts-v1/;
+
+ / {
+ compatible = "kho-v1";
+
+ preserved-memory-map = <0x40be16 0x1000000>;
+
+ memblock {
+ fdt = <0x1517 0x1000000>;
+ };
+ };
+
+where the ``memblock`` node contains an FDT that is requested by the
+subsystem memblock for preservation. The FDT contains the following
+serialized data::
+
+ /dts-v1/;
+
+ / {
+ compatible = "memblock-v1";
+
+ n1 {
+ compatible = "reserve-mem-v1";
+ start = <0xc06b 0x4000000>;
+ size = <0x04 0x00>;
+ };
+
+ n2 {
+ compatible = "reserve-mem-v1";
+ start = <0xc067 0x4000000>;
+ size = <0x04 0x00>;
+ };
+ };
diff --git a/Documentation/core-api/kho/index.rst b/Documentation/core-api/kho/index.rst
new file mode 100644
index 000000000000..0c63b0c5c143
--- /dev/null
+++ b/Documentation/core-api/kho/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+========================
+Kexec Handover Subsystem
+========================
+
+.. toctree::
+ :maxdepth: 1
+
+ concepts
+ fdt
+
+.. only:: subproject and html
diff --git a/Documentation/core-api/kref.rst b/Documentation/core-api/kref.rst
index c61eea6f1bf2..8db9ff03d952 100644
--- a/Documentation/core-api/kref.rst
+++ b/Documentation/core-api/kref.rst
@@ -3,7 +3,7 @@ Adding reference counters (krefs) to kernel objects
===================================================
:Author: Corey Minyard <minyard@acm.org>
-:Author: Thomas Hellstrom <thellstrom@vmware.com>
+:Author: Thomas Hellström <thomas.hellstrom@linux.intel.com>
A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
presentation on krefs, which can be found at:
@@ -321,3 +321,8 @@ rcu grace period after release_entry_rcu was called. That can be accomplished
by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
before using kfree, but note that synchronize_rcu() may sleep for a
substantial amount of time.
+
+Functions and structures
+========================
+
+.. kernel-doc:: include/linux/kref.h
diff --git a/Documentation/core-api/list.rst b/Documentation/core-api/list.rst
new file mode 100644
index 000000000000..86873ce9adbf
--- /dev/null
+++ b/Documentation/core-api/list.rst
@@ -0,0 +1,776 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+=====================
+Linked Lists in Linux
+=====================
+
+:Author: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
+
+.. contents::
+
+Introduction
+============
+
+Linked lists are one of the most basic data structures used in many programs.
+The Linux kernel implements several different flavours of linked lists. The
+purpose of this document is not to explain linked lists in general, but to show
+new kernel developers how to use the Linux kernel implementations of linked
+lists.
+
+Please note that while linked lists certainly are ubiquitous, they are rarely
+the best data structure to use in cases where a simple array doesn't already
+suffice. In particular, due to their poor data locality, linked lists are a bad
+choice in situations where performance may be of consideration. Familiarizing
+oneself with other in-kernel generic data structures, especially for concurrent
+accesses, is highly encouraged.
+
+Linux implementation of doubly linked lists
+===========================================
+
+Linux's linked list implementations can be used by including the header file
+``<linux/list.h>``.
+
+The doubly-linked list will likely be the most familiar to many readers. It's a
+list that can efficiently be traversed forwards and backwards.
+
+The Linux kernel's doubly-linked list is circular in nature. This means that to
+get from the head node to the tail, we can just travel one edge backwards.
+Similarly, to get from the tail node to the head, we can simply travel forwards
+"beyond" the tail and arrive back at the head.
+
+Declaring a node
+----------------
+
+A node in a doubly-linked list is declared by adding a struct list_head
+member to the data structure you wish to be contained in the list:
+
+.. code-block:: c
+
+ struct clown {
+ unsigned long long shoe_size;
+ const char *name;
+ struct list_head node; /* the aforementioned member */
+ };
+
+This may be an unfamiliar approach to some, as the classical explanation of a
+linked list is a list node data structure with pointers to the previous and next
+list node, as well the payload data. Linux chooses this approach because it
+allows for generic list modification code regardless of what data structure is
+contained within the list. Since the struct list_head member is not a pointer
+but part of the data structure proper, the container_of() pattern can be used by
+the list implementation to access the payload data regardless of its type, while
+staying oblivious to what said type actually is.
+
+Declaring and initializing a list
+---------------------------------
+
+A doubly-linked list can then be declared as just another struct list_head,
+and initialized with the LIST_HEAD_INIT() macro during initial assignment, or
+with the INIT_LIST_HEAD() function later:
+
+.. code-block:: c
+
+ struct clown_car {
+ int tyre_pressure[4];
+ struct list_head clowns; /* Looks like a node! */
+ };
+
+ /* ... Somewhere later in our driver ... */
+
+ static int circus_init(struct circus_priv *circus)
+ {
+ struct clown_car other_car = {
+ .tyre_pressure = {10, 12, 11, 9},
+ .clowns = LIST_HEAD_INIT(other_car.clowns)
+ };
+
+ INIT_LIST_HEAD(&circus->car.clowns);
+
+ return 0;
+ }
+
+A further point of confusion to some may be that the list itself doesn't really
+have its own type. The concept of the entire linked list and a
+struct list_head member that points to other entries in the list are one and
+the same.
+
+Adding nodes to the list
+------------------------
+
+Adding a node to the linked list is done through the list_add() macro.
+
+We'll return to our clown car example to illustrate how nodes get added to the
+list:
+
+.. code-block:: c
+
+ static int circus_fill_car(struct circus_priv *circus)
+ {
+ struct clown_car *car = &circus->car;
+ struct clown *grock;
+ struct clown *dimitri;
+
+ /* State 1 */
+
+ grock = kzalloc(sizeof(*grock), GFP_KERNEL);
+ if (!grock)
+ return -ENOMEM;
+ grock->name = "Grock";
+ grock->shoe_size = 1000;
+
+ /* Note that we're adding the "node" member */
+ list_add(&grock->node, &car->clowns);
+
+ /* State 2 */
+
+ dimitri = kzalloc(sizeof(*dimitri), GFP_KERNEL);
+ if (!dimitri)
+ return -ENOMEM;
+ dimitri->name = "Dimitri";
+ dimitri->shoe_size = 50;
+
+ list_add(&dimitri->node, &car->clowns);
+
+ /* State 3 */
+
+ return 0;
+ }
+
+In State 1, our list of clowns is still empty::
+
+ .------.
+ v |
+ .--------. |
+ | clowns |--'
+ '--------'
+
+This diagram shows the singular "clowns" node pointing at itself. In this
+diagram, and all following diagrams, only the forward edges are shown, to aid in
+clarity.
+
+In State 2, we've added Grock after the list head::
+
+ .--------------------.
+ v |
+ .--------. .-------. |
+ | clowns |---->| Grock |--'
+ '--------' '-------'
+
+This diagram shows the "clowns" node pointing at a new node labeled "Grock".
+The Grock node is pointing back at the "clowns" node.
+
+In State 3, we've added Dimitri after the list head, resulting in the following::
+
+ .------------------------------------.
+ v |
+ .--------. .---------. .-------. |
+ | clowns |---->| Dimitri |---->| Grock |--'
+ '--------' '---------' '-------'
+
+This diagram shows the "clowns" node pointing at a new node labeled "Dimitri",
+which then points at the node labeled "Grock". The "Grock" node still points
+back at the "clowns" node.
+
+If we wanted to have Dimitri inserted at the end of the list instead, we'd use
+list_add_tail(). Our code would then look like this:
+
+.. code-block:: c
+
+ static int circus_fill_car(struct circus_priv *circus)
+ {
+ /* ... */
+
+ list_add_tail(&dimitri->node, &car->clowns);
+
+ /* State 3b */
+
+ return 0;
+ }
+
+This results in the following list::
+
+ .------------------------------------.
+ v |
+ .--------. .-------. .---------. |
+ | clowns |---->| Grock |---->| Dimitri |--'
+ '--------' '-------' '---------'
+
+This diagram shows the "clowns" node pointing at the node labeled "Grock",
+which points at the new node labeled "Dimitri". The node labeled "Dimitri"
+points back at the "clowns" node.
+
+Traversing the list
+-------------------
+
+To iterate the list, we can loop through all nodes within the list with
+list_for_each().
+
+In our clown example, this results in the following somewhat awkward code:
+
+.. code-block:: c
+
+ static unsigned long long circus_get_max_shoe_size(struct circus_priv *circus)
+ {
+ unsigned long long res = 0;
+ struct clown *e;
+ struct list_head *cur;
+
+ list_for_each(cur, &circus->car.clowns) {
+ e = list_entry(cur, struct clown, node);
+ if (e->shoe_size > res)
+ res = e->shoe_size;
+ }
+
+ return res;
+ }
+
+The list_entry() macro internally uses the aforementioned container_of() to
+retrieve the data structure instance that ``node`` is a member of.
+
+Note how the additional list_entry() call is a little awkward here. It's only
+there because we're iterating through the ``node`` members, but we really want
+to iterate through the payload, i.e. the ``struct clown`` that contains each
+node's struct list_head. For this reason, there is a second macro:
+list_for_each_entry()
+
+Using it would change our code to something like this:
+
+.. code-block:: c
+
+ static unsigned long long circus_get_max_shoe_size(struct circus_priv *circus)
+ {
+ unsigned long long res = 0;
+ struct clown *e;
+
+ list_for_each_entry(e, &circus->car.clowns, node) {
+ if (e->shoe_size > res)
+ res = e->shoe_size;
+ }
+
+ return res;
+ }
+
+This eliminates the need for the list_entry() step, and our loop cursor is now
+of the type of our payload. The macro is given the member name that corresponds
+to the list's struct list_head within the clown data structure so that it can
+still walk the list.
+
+Removing nodes from the list
+----------------------------
+
+The list_del() function can be used to remove entries from the list. It not only
+removes the given entry from the list, but poisons the entry's ``prev`` and
+``next`` pointers, so that unintended use of the entry after removal does not
+go unnoticed.
+
+We can extend our previous example to remove one of the entries:
+
+.. code-block:: c
+
+ static int circus_fill_car(struct circus_priv *circus)
+ {
+ /* ... */
+
+ list_add(&dimitri->node, &car->clowns);
+
+ /* State 3 */
+
+ list_del(&dimitri->node);
+
+ /* State 4 */
+
+ return 0;
+ }
+
+The result of this would be this::
+
+ .--------------------.
+ v |
+ .--------. .-------. | .---------.
+ | clowns |---->| Grock |--' | Dimitri |
+ '--------' '-------' '---------'
+
+This diagram shows the "clowns" node pointing at the node labeled "Grock",
+which points back at the "clowns" node. Off to the side is a lone node labeled
+"Dimitri", which has no arrows pointing anywhere.
+
+Note how the Dimitri node does not point to itself; its pointers are
+intentionally set to a "poison" value that the list code refuses to traverse.
+
+If we wanted to reinitialize the removed node instead to make it point at itself
+again like an empty list head, we can use list_del_init() instead:
+
+.. code-block:: c
+
+ static int circus_fill_car(struct circus_priv *circus)
+ {
+ /* ... */
+
+ list_add(&dimitri->node, &car->clowns);
+
+ /* State 3 */
+
+ list_del_init(&dimitri->node);
+
+ /* State 4b */
+
+ return 0;
+ }
+
+This results in the deleted node pointing to itself again::
+
+ .--------------------. .-------.
+ v | v |
+ .--------. .-------. | .---------. |
+ | clowns |---->| Grock |--' | Dimitri |--'
+ '--------' '-------' '---------'
+
+This diagram shows the "clowns" node pointing at the node labeled "Grock",
+which points back at the "clowns" node. Off to the side is a lone node labeled
+"Dimitri", which points to itself.
+
+Traversing whilst removing nodes
+--------------------------------
+
+Deleting entries while we're traversing the list will cause problems if we use
+list_for_each() and list_for_each_entry(), as deleting the current entry would
+modify the ``next`` pointer of it, which means the traversal can't properly
+advance to the next list entry.
+
+There is a solution to this however: list_for_each_safe() and
+list_for_each_entry_safe(). These take an additional parameter of a pointer to
+a struct list_head to use as temporary storage for the next entry during
+iteration, solving the issue.
+
+An example of how to use it:
+
+.. code-block:: c
+
+ static void circus_eject_insufficient_clowns(struct circus_priv *circus)
+ {
+ struct clown *e;
+ struct clown *n; /* temporary storage for safe iteration */
+
+ list_for_each_entry_safe(e, n, &circus->car.clowns, node) {
+ if (e->shoe_size < 500)
+ list_del(&e->node);
+ }
+ }
+
+Proper memory management (i.e. freeing the deleted node while making sure
+nothing still references it) in this case is left as an exercise to the reader.
+
+Cutting a list
+--------------
+
+There are two helper functions to cut lists with. Both take elements from the
+list ``head``, and replace the contents of the list ``list``.
+
+The first such function is list_cut_position(). It removes all list entries from
+``head`` up to and including ``entry``, placing them in ``list`` instead.
+
+In this example, it's assumed we start with the following list::
+
+ .----------------------------------------------------------------.
+ v |
+ .--------. .-------. .---------. .-----. .---------. |
+ | clowns |---->| Grock |---->| Dimitri |---->| Pic |---->| Alfredo |--'
+ '--------' '-------' '---------' '-----' '---------'
+
+With the following code, every clown up to and including "Pic" is moved from
+the "clowns" list head to a separate struct list_head initialized at local
+stack variable ``retirement``:
+
+.. code-block:: c
+
+ static void circus_retire_clowns(struct circus_priv *circus)
+ {
+ struct list_head retirement = LIST_HEAD_INIT(retirement);
+ struct clown *grock, *dimitri, *pic, *alfredo;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ list_cut_position(&retirement, &car->clowns, &pic->node);
+
+ /* State 1 */
+ }
+
+The resulting ``car->clowns`` list would be this::
+
+ .----------------------.
+ v |
+ .--------. .---------. |
+ | clowns |---->| Alfredo |--'
+ '--------' '---------'
+
+Meanwhile, the ``retirement`` list is transformed to the following::
+
+ .--------------------------------------------------.
+ v |
+ .------------. .-------. .---------. .-----. |
+ | retirement |---->| Grock |---->| Dimitri |---->| Pic |--'
+ '------------' '-------' '---------' '-----'
+
+The second function, list_cut_before(), is much the same, except it cuts before
+the ``entry`` node, i.e. it removes all list entries from ``head`` up to but
+excluding ``entry``, placing them in ``list`` instead. This example assumes the
+same initial starting list as the previous example:
+
+.. code-block:: c
+
+ static void circus_retire_clowns(struct circus_priv *circus)
+ {
+ struct list_head retirement = LIST_HEAD_INIT(retirement);
+ struct clown *grock, *dimitri, *pic, *alfredo;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ list_cut_before(&retirement, &car->clowns, &pic->node);
+
+ /* State 1b */
+ }
+
+The resulting ``car->clowns`` list would be this::
+
+ .----------------------------------.
+ v |
+ .--------. .-----. .---------. |
+ | clowns |---->| Pic |---->| Alfredo |--'
+ '--------' '-----' '---------'
+
+Meanwhile, the ``retirement`` list is transformed to the following::
+
+ .--------------------------------------.
+ v |
+ .------------. .-------. .---------. |
+ | retirement |---->| Grock |---->| Dimitri |--'
+ '------------' '-------' '---------'
+
+It should be noted that both functions will destroy links to any existing nodes
+in the destination ``struct list_head *list``.
+
+Moving entries and partial lists
+--------------------------------
+
+The list_move() and list_move_tail() functions can be used to move an entry
+from one list to another, to either the start or end respectively.
+
+In the following example, we'll assume we start with two lists ("clowns" and
+"sidewalk" in the following initial state "State 0"::
+
+ .----------------------------------------------------------------.
+ v |
+ .--------. .-------. .---------. .-----. .---------. |
+ | clowns |---->| Grock |---->| Dimitri |---->| Pic |---->| Alfredo |--'
+ '--------' '-------' '---------' '-----' '---------'
+
+ .-------------------.
+ v |
+ .----------. .-----. |
+ | sidewalk |---->| Pio |--'
+ '----------' '-----'
+
+We apply the following example code to the two lists:
+
+.. code-block:: c
+
+ static void circus_clowns_exit_car(struct circus_priv *circus)
+ {
+ struct list_head sidewalk = LIST_HEAD_INIT(sidewalk);
+ struct clown *grock, *dimitri, *pic, *alfredo, *pio;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ /* State 0 */
+
+ list_move(&pic->node, &sidewalk);
+
+ /* State 1 */
+
+ list_move_tail(&dimitri->node, &sidewalk);
+
+ /* State 2 */
+ }
+
+In State 1, we arrive at the following situation::
+
+ .-----------------------------------------------------.
+ | |
+ v |
+ .--------. .-------. .---------. .---------. |
+ | clowns |---->| Grock |---->| Dimitri |---->| Alfredo |--'
+ '--------' '-------' '---------' '---------'
+
+ .-------------------------------.
+ v |
+ .----------. .-----. .-----. |
+ | sidewalk |---->| Pic |---->| Pio |--'
+ '----------' '-----' '-----'
+
+In State 2, after we've moved Dimitri to the tail of sidewalk, the situation
+changes as follows::
+
+ .-------------------------------------.
+ | |
+ v |
+ .--------. .-------. .---------. |
+ | clowns |---->| Grock |---->| Alfredo |--'
+ '--------' '-------' '---------'
+
+ .-----------------------------------------------.
+ v |
+ .----------. .-----. .-----. .---------. |
+ | sidewalk |---->| Pic |---->| Pio |---->| Dimitri |--'
+ '----------' '-----' '-----' '---------'
+
+As long as the source and destination list head are part of the same list, we
+can also efficiently bulk move a segment of the list to the tail end of the
+list. We continue the previous example by adding a list_bulk_move_tail() after
+State 2, moving Pic and Pio to the tail end of the sidewalk list.
+
+.. code-block:: c
+
+ static void circus_clowns_exit_car(struct circus_priv *circus)
+ {
+ struct list_head sidewalk = LIST_HEAD_INIT(sidewalk);
+ struct clown *grock, *dimitri, *pic, *alfredo, *pio;
+ struct clown_car *car = &circus->car;
+
+ /* ... clown initialization, list adding ... */
+
+ /* State 0 */
+
+ list_move(&pic->node, &sidewalk);
+
+ /* State 1 */
+
+ list_move_tail(&dimitri->node, &sidewalk);
+
+ /* State 2 */
+
+ list_bulk_move_tail(&sidewalk, &pic->node, &pio->node);
+
+ /* State 3 */
+ }
+
+For the sake of brevity, only the altered "sidewalk" list at State 3 is depicted
+in the following diagram::
+
+ .-----------------------------------------------.
+ v |
+ .----------. .---------. .-----. .-----. |
+ | sidewalk |---->| Dimitri |---->| Pic |---->| Pio |--'
+ '----------' '---------' '-----' '-----'
+
+Do note that list_bulk_move_tail() does not do any checking as to whether all
+three supplied ``struct list_head *`` parameters really do belong to the same
+list. If you use it outside the constraints the documentation gives, then the
+result is a matter between you and the implementation.
+
+Rotating entries
+----------------
+
+A common write operation on lists, especially when using them as queues, is
+to rotate it. A list rotation means entries at the front are sent to the back.
+
+For rotation, Linux provides us with two functions: list_rotate_left() and
+list_rotate_to_front(). The former can be pictured like a bicycle chain, taking
+the entry after the supplied ``struct list_head *`` and moving it to the tail,
+which in essence means the entire list, due to its circular nature, rotates by
+one position.
+
+The latter, list_rotate_to_front(), takes the same concept one step further:
+instead of advancing the list by one entry, it advances it *until* the specified
+entry is the new front.
+
+In the following example, our starting state, State 0, is the following::
+
+ .-----------------------------------------------------------------.
+ v |
+ .--------. .-------. .---------. .-----. .---------. .-----. |
+ | clowns |-->| Grock |-->| Dimitri |-->| Pic |-->| Alfredo |-->| Pio |-'
+ '--------' '-------' '---------' '-----' '---------' '-----'
+
+The example code being used to demonstrate list rotations is the following: