summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-06-03 10:34:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-06-03 10:34:34 -0700
commit04d93b2b8bc7a68ec45a6a156f34a611ede5aa60 (patch)
treecea9f0b45aa209ec9a2cb8227d2542bf43e6d835 /scripts
parent78c6499c92090d0fd1ddd1684fc3a5dc41d98c92 (diff)
parent53c83d6d8e399fad3d3d25df0ea0d54eb0f94f88 (diff)
Merge tag 'spdx-5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx
Pull SPDX updates from Greg KH: "Here are some SPDX license marker changes. The SPDX-labeling effort has started to pick up again, so here are some changes for various parts of the tree that are related to this effort. Included in here are: - freevxfs license updates - spihash.c license cleanups - spdxcheck script updates to make things easier to work with going forward All of the license updates came from the original authors/copyright holders of the code involved. All of these have been in linux-next for weeks with no reported issues" * tag 'spdx-5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx: siphash: add SPDX tags as sole licensing authority scripts/spdxcheck: Exclude top-level README scripts/spdxcheck: Exclude MAINTAINERS/CREDITS scripts/spdxcheck: Exclude config directories scripts/spdxcheck: Put excluded files and directories into a separate file scripts/spdxcheck: Add option to display files without SPDX scripts/spdxcheck: Add [sub]directory statistics scripts/spdxcheck: Add directory statistics scripts/spdxcheck: Add percentage to statistics freevxfs: relicense to GPLv2 only
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/spdxcheck.py175
-rw-r--r--scripts/spdxexclude18
2 files changed, 181 insertions, 12 deletions
diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py
index f3be8ed54f6d..18cb9f5b3d3d 100755
--- a/scripts/spdxcheck.py
+++ b/scripts/spdxcheck.py
@@ -6,6 +6,7 @@ from argparse import ArgumentParser
from ply import lex, yacc
import locale
import traceback
+import fnmatch
import sys
import git
import re
@@ -28,6 +29,21 @@ class SPDXdata(object):
self.licenses = [ ]
self.exceptions = { }
+class dirinfo(object):
+ def __init__(self):
+ self.missing = 0
+ self.total = 0
+ self.files = []
+
+ def update(self, fname, basedir, miss):
+ self.total += 1
+ self.missing += miss
+ if miss:
+ fname = './' + fname
+ bdir = os.path.dirname(fname)
+ if bdir == basedir.rstrip('/'):
+ self.files.append(fname)
+
# Read the spdx data from the LICENSES directory
def read_spdxdata(repo):
@@ -91,11 +107,25 @@ class id_parser(object):
self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
self.lines_checked = 0
self.checked = 0
+ self.excluded = 0
self.spdx_valid = 0
self.spdx_errors = 0
+ self.spdx_dirs = {}
+ self.dirdepth = -1
+ self.basedir = '.'
self.curline = 0
self.deepest = 0
+ def set_dirinfo(self, basedir, dirdepth):
+ if dirdepth >= 0:
+ self.basedir = basedir
+ bdir = basedir.lstrip('./').rstrip('/')
+ if bdir != '':
+ parts = bdir.split('/')
+ else:
+ parts = []
+ self.dirdepth = dirdepth + len(parts)
+
# Validate License and Exception IDs
def validate(self, tok):
id = tok.value.upper()
@@ -167,6 +197,7 @@ class id_parser(object):
def parse_lines(self, fd, maxlines, fname):
self.checked += 1
self.curline = 0
+ fail = 1
try:
for line in fd:
line = line.decode(locale.getpreferredencoding(False), errors='ignore')
@@ -192,6 +223,7 @@ class id_parser(object):
# Should we check for more SPDX ids in the same file and
# complain if there are any?
#
+ fail = 0
break
except ParserException as pe:
@@ -203,28 +235,102 @@ class id_parser(object):
sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, pe.txt))
self.spdx_errors += 1
-def scan_git_tree(tree):
+ if fname == '-':
+ return
+
+ base = os.path.dirname(fname)
+ if self.dirdepth > 0:
+ parts = base.split('/')
+ i = 0
+ base = '.'
+ while i < self.dirdepth and i < len(parts) and len(parts[i]):
+ base += '/' + parts[i]
+ i += 1
+ elif self.dirdepth == 0:
+ base = self.basedir
+ else:
+ base = './' + base.rstrip('/')
+ base += '/'
+
+ di = self.spdx_dirs.get(base, dirinfo())
+ di.update(fname, base, fail)
+ self.spdx_dirs[base] = di
+
+class pattern(object):
+ def __init__(self, line):
+ self.pattern = line
+ self.match = self.match_file
+ if line == '.*':
+ self.match = self.match_dot
+ elif line.endswith('/'):
+ self.pattern = line[:-1]
+ self.match = self.match_dir
+ elif line.startswith('/'):
+ self.pattern = line[1:]
+ self.match = self.match_fn
+
+ def match_dot(self, fpath):
+ return os.path.basename(fpath).startswith('.')
+
+ def match_file(self, fpath):
+ return os.path.basename(fpath) == self.pattern
+
+ def match_fn(self, fpath):
+ return fnmatch.fnmatchcase(fpath, self.pattern)
+
+ def match_dir(self, fpath):
+ if self.match_fn(os.path.dirname(fpath)):
+ return True
+ return fpath.startswith(self.pattern)
+
+def exclude_file(fpath):
+ for rule in exclude_rules:
+ if rule.match(fpath):
+ return True
+ return False
+
+def scan_git_tree(tree, basedir, dirdepth):
+ parser.set_dirinfo(basedir, dirdepth)
for el in tree.traverse():
- # Exclude stuff which would make pointless noise
- # FIXME: Put this somewhere more sensible
- if el.path.startswith("LICENSES"):
- continue
- if el.path.find("license-rules.rst") >= 0:
- continue
if not os.path.isfile(el.path):
continue
+ if exclude_file(el.path):
+ parser.excluded += 1
+ continue
with open(el.path, 'rb') as fd:
parser.parse_lines(fd, args.maxlines, el.path)
-def scan_git_subtree(tree, path):
+def scan_git_subtree(tree, path, dirdepth):
for p in path.strip('/').split('/'):
tree = tree[p]
- scan_git_tree(tree)
+ scan_git_tree(tree, path.strip('/'), dirdepth)
+
+def read_exclude_file(fname):
+ rules = []
+ if not fname:
+ return rules
+ with open(fname) as fd:
+ for line in fd:
+ line = line.strip()
+ if line.startswith('#'):
+ continue
+ if not len(line):
+ continue
+ rules.append(pattern(line))
+ return rules
if __name__ == '__main__':
ap = ArgumentParser(description='SPDX expression checker')
ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
+ ap.add_argument('-d', '--dirs', action='store_true',
+ help='Show [sub]directory statistics.')
+ ap.add_argument('-D', '--depth', type=int, default=-1,
+ help='Directory depth for -d statistics. Default: unlimited')
+ ap.add_argument('-e', '--exclude',
+ help='File containing file patterns to exclude. Default: scripts/spdxexclude')
+ ap.add_argument('-f', '--files', action='store_true',
+ help='Show files without SPDX.')
ap.add_argument('-m', '--maxlines', type=int, default=15,
help='Maximum number of lines to scan in a file. Default 15')
ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
@@ -259,6 +365,15 @@ if __name__ == '__main__':
sys.exit(1)
try:
+ fname = args.exclude
+ if not fname:
+ fname = os.path.join(os.path.dirname(__file__), 'spdxexclude')
+ exclude_rules = read_exclude_file(fname)
+ except Exception as ex:
+ sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex))
+ sys.exit(1)
+
+ try:
if len(args.path) and args.path[0] == '-':
stdin = os.fdopen(sys.stdin.fileno(), 'rb')
parser.parse_lines(stdin, args.maxlines, '-')
@@ -268,13 +383,21 @@ if __name__ == '__main__':
if os.path.isfile(p):
parser.parse_lines(open(p, 'rb'), args.maxlines, p)
elif os.path.isdir(p):
- scan_git_subtree(repo.head.reference.commit.tree, p)
+ scan_git_subtree(repo.head.reference.commit.tree, p,
+ args.depth)
else:
sys.stderr.write('path %s does not exist\n' %p)
sys.exit(1)
else:
# Full git tree scan
- scan_git_tree(repo.head.commit.tree)
+ scan_git_tree(repo.head.commit.tree, '.', args.depth)
+
+ ndirs = len(parser.spdx_dirs)
+ dirsok = 0
+ if ndirs:
+ for di in parser.spdx_dirs.values():
+ if not di.missing:
+ dirsok += 1
if args.verbose:
sys.stderr.write('\n')
@@ -283,10 +406,38 @@ if __name__ == '__main__':
sys.stderr.write('License IDs %12d\n' %len(spdx.licenses))
sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions))
sys.stderr.write('\n')
+ sys.stderr.write('Files excluded: %12d\n' %parser.excluded)
sys.stderr.write('Files checked: %12d\n' %parser.checked)
sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
- sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid)
+ if parser.checked:
+ pc = int(100 * parser.spdx_valid / parser.checked)
+ sys.stderr.write('Files with SPDX: %12d %3d%%\n' %(parser.spdx_valid, pc))
sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
+ if ndirs:
+ sys.stderr.write('\n')
+ sys.stderr.write('Directories accounted: %8d\n' %ndirs)
+ pc = int(100 * dirsok / ndirs)
+ sys.stderr.write('Directories complete: %8d %3d%%\n' %(dirsok, pc))
+
+ if ndirs and ndirs != dirsok and args.dirs:
+ if args.verbose:
+ sys.stderr.write('\n')
+ sys.stderr.write('Incomplete directories: SPDX in Files\n')
+ for f in sorted(parser.spdx_dirs.keys()):
+ di = parser.spdx_dirs[f]
+ if di.missing:
+ valid = di.total - di.missing
+ pc = int(100 * valid / di.total)
+ sys.stderr.write(' %-80s: %5d of %5d %3d%%\n' %(f, valid, di.total, pc))
+
+ if ndirs and ndirs != dirsok and args.files:
+ if args.verbose or args.dirs:
+ sys.stderr.write('\n')
+ sys.stderr.write('Files without SPDX:\n')
+ for f in sorted(parser.spdx_dirs.keys()):
+ di = parser.spdx_dirs[f]
+ for f in sorted(di.files):
+ sys.stderr.write(' %s\n' %f)
sys.exit(0)
diff --git a/scripts/spdxexclude b/scripts/spdxexclude
new file mode 100644
index 000000000000..81bdb13ed789
--- /dev/null
+++ b/scripts/spdxexclude
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Patterns for excluding files and directories
+
+# Ignore the license directory and the licensing documentation which would
+# create lots of noise for no value
+LICENSES/
+license-rules.rst
+
+# Ignore config files and snippets. The majority is generated
+# by the Kconfig tools
+kernel/configs/
+arch/*/configs/
+
+# Other files without copyrightable content
+/CREDITS
+/MAINTAINERS
+/README