]> xenbits.xensource.com Git - libvirt.git/commitdiff
docs: Prohibit 'external' links within the webpage
authorPeter Krempa <pkrempa@redhat.com>
Tue, 8 Oct 2024 13:06:17 +0000 (15:06 +0200)
committerPeter Krempa <pkrempa@redhat.com>
Wed, 9 Oct 2024 14:00:44 +0000 (16:00 +0200)
Enforce that relative links are used within the page, so that local
installations don't require internet conection and/or don't redirect to
the web needlessly.

This is done by looking for any local link (barring exceptions) when
checking links with 'check-html-references.py'.

Signed-off-by: Peter Krempa <pkrempa@redhat.com>
Reviewed-by: Ján Tomko <jtomko@redhat.com>
docs/meson.build
scripts/check-html-references.py

index a94f481730686a84293419e06e99479e863a3f6c..d7343b666577ce75ba316ebf6e0a69510e845804 100644 (file)
@@ -359,6 +359,9 @@ if tests_enabled[0]
     args: [
       check_html_references_prog.full_path(),
       '--require-https',
+      '--project-uri', 'https://libvirt.org',
+      '--project-uri-exceptions', 'docs/manpages/',
+      '--project-uri-exceptions', 'docs/html/',
       '--webroot',
       meson.project_build_root() / 'docs'
     ],
index 3382d838c5efea35b82a066b12e9c49e90f3a8cf..74299a1958a1ac939985e573e66d38635b2c347e 100755 (executable)
@@ -53,7 +53,7 @@ def get_file_list(prefix):
 
 
 # loads an XHTML and extracts all anchors, local and remote links for the one file
-def process_file(filename):
+def process_file(filename, project_uri):
     tree = ET.parse(filename)
     root = tree.getroot()
     docname = root.get('data-sourcedoc')
@@ -65,6 +65,7 @@ def process_file(filename):
     anchors = [filename]
     targets = []
     images = []
+    projectlinks = []
 
     for elem in root.findall('.//html:a', ns):
         target = elem.get('href')
@@ -76,6 +77,10 @@ def process_file(filename):
         if target:
             if re.search('://', target):
                 externallinks.append(target)
+
+                if project_uri is not None and target.startswith(project_uri):
+                    projectlinks.append((target, docname))
+
             elif target[0] != '#' and 'mailto:' not in target:
                 targetfull = os.path.normpath(os.path.join(dirname, target))
 
@@ -106,22 +111,24 @@ def process_file(filename):
                 imagefull = os.path.normpath(os.path.join(dirname, src))
                 images.append((imagefull, docname))
 
-    return (anchors, targets, images)
+    return (anchors, targets, images, projectlinks)
 
 
-def process_all(filelist):
+def process_all(filelist, project_uri):
     anchors = []
     targets = []
     images = []
+    projectlinks = []
 
     for file in filelist:
-        anchor, target, image = process_file(file)
+        anchor, target, image, projectlink = process_file(file, project_uri)
 
         targets = targets + target
         anchors = anchors + anchor
         images = images + image
+        projectlinks = projectlinks + projectlink
 
-    return (targets, anchors, images)
+    return (targets, anchors, images, projectlinks)
 
 
 def check_targets(targets, anchors):
@@ -236,6 +243,26 @@ def check_https(links):
     return fail
 
 
+# checks prohibited external links to local files
+def check_projectlinks(projectlinks, exceptions):
+    fail = False
+
+    for (link, filename) in projectlinks:
+        allowed = False
+
+        if exceptions is not None:
+            for exc in exceptions:
+                if exc in filename:
+                    allowed = True
+                    break
+
+        if not allowed:
+            print(f'ERROR: prohibited external URI \'{link}\' to local project in \'{filename}\'')
+            fail = True
+
+    return fail
+
+
 parser = argparse.ArgumentParser(description='HTML reference checker')
 parser.add_argument('--webroot', required=True,
                     help='path to the web root')
@@ -247,6 +274,10 @@ parser.add_argument('--ignore-images', action='append',
                     help='paths to images that should be considered as used')
 parser.add_argument('--require-https', action="store_true",
                     help='require secure https for external links')
+parser.add_argument('--project-uri',
+                    help='external prefix of the local project (e.g. https://libvirt.org; external links with that prefix are prohibited')
+parser.add_argument('--project-uri-exceptions', action='append',
+                    help='list of path prefixes excluded from the "--project-uri" checks')
 
 args = parser.parse_args()
 
@@ -254,7 +285,7 @@ files, imagefiles = get_file_list(os.path.abspath(args.webroot))
 
 entrypoint = os.path.join(os.path.abspath(args.webroot), args.entrypoint)
 
-targets, anchors, usedimages = process_all(files)
+targets, anchors, usedimages, projectlinks = process_all(files, args.project_uri)
 
 fail = False
 
@@ -283,6 +314,9 @@ else:
     if check_images(usedimages, imagefiles, args.ignore_images):
         fail = True
 
+    if check_projectlinks(projectlinks, args.project_uri_exceptions):
+        fail = True
+
     if args.require_https:
         if check_https(externallinks):
             fail = True