0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-25 13:33:48 +00:00
netdata_netdata/docs/generator/checklinks.sh
Paweł Krupa 5ed8653246
Rename doc to docs and move htmldoc ()
* rename doc dir; move htmldoc under docs/generator

* run shellcheck and shfmt on docs generator
2018-11-25 20:11:50 +02:00

394 lines
9.4 KiB
Bash
Executable file

#!/bin/bash
# shellcheck disable=SC2181
# Doc link checker
# Validates and tries to fix all links that will cause issues either in the repo, or in the html site
GENERATOR_DIR="docs/generator"
dbg () {
if [ "$VERBOSE" -eq 1 ] ; then printf "%s\\n" "${1}" ; fi
}
printhelp () {
echo "Usage: docs/generator/checklinks.sh [-r OR -f <fname>] [OPTIONS]
-r Recursively check all mds in all child directories, except docs/generator and node_modules (which is generatord by netlify)
-f Just check the passed md file
General Options:
-x Execute commands. By default the script runs in test mode with no files changed by the script (results and fixes are just shown). Use -x to have it apply the changes.
-u trys to follow URLs using curl
-v Outputs debugging messages
By default, nothing is actually checked. The following options tell it what to check:
-a Check all link types
-w Check wiki links (and just warn if you see one)
-b Check absolute links to the netdata repo (and change them to relative). Only checks links to https://github.com/netdata/netdata/????/master*
-l Check relative links to the netdata repo (and replace them with links that the html static site can live with, under docs/generator/src only)
-e Check external links, outside the wiki or the repo (useless without adding the -u option, to verify that they're not broken)
"
}
fix () {
if [ "$EXECUTE" -eq 0 ] ; then
echo "-- SHOULD EXECUTE: $1"
else
dbg "-- EXECUTING: $1"
eval "$1"
fi
}
ck_netdata_absolute () {
f=$1
alnk=$2
lnkinfile=$3
testURL "$alnk"
if [[ $f =~ ^(.*)/([^/]*)$ ]] ; then
fpath="${BASH_REMATCH[1]}"
dbg "-- Current file is at $fpath"
fi
if [ $? -eq 0 ] ; then
rlnk=$(echo "$alnk" | sed 's/https:\/\/github.com\/netdata\/netdata\/....\/master\///g')
case $rlnk in
\#* ) dbg "-- (#somelink)" ;;
*/ ) dbg "-- # (path/)" ;;
*/#* ) dbg "-- # (path/#somelink)" ;;
*/*.md ) dbg "-- # (path/filename.md)" ;;
*/*.md#* ) dbg "-- # (path/filename.md#somelink)" ;;
*#* )
dbg "-- # (path#somelink) -> (path/#somelink)"
if [[ $rlnk =~ ^(.*)#(.*)$ ]] ; then
dbg "-- $rlnk -> ${BASH_REMATCH[1]}/#${BASH_REMATCH[2]}"
rlnk="${BASH_REMATCH[1]}/#${BASH_REMATCH[2]}"
fi
;;
* )
if [ -f "$rlnk" ] ; then
dbg "-- # (path/someotherfile) $rlnk"
else
if [ -d "$rlnk" ] ; then
dbg "-- # (path) -> (path/)"
rlnk="$rlnk/"
else
echo "-- ERROR: $f - $alnk is neither a file nor a directory. Giving up!"
EXITCODE=1
return
fi
fi
;;
esac
if [[ $rlnk =~ ^(.*)/([^/]*)$ ]] ; then
abspath="${BASH_REMATCH[1]}"
rest="${BASH_REMATCH[2]}"
dbg "-- Target file is at $abspath"
fi
relativelink=$(realpath --relative-to="$fpath" "$abspath")
if [ $? -eq 0 ] ; then
srch=$(echo "$lnkinfile" | sed 's/\//\\\//g')
if [ "$relativelink" = "." ] ; then
rplc=$(echo "$rest" | sed 's/\//\\\//g')
else
rplc=$(echo "$relativelink/$rest" | sed 's/\//\\\//g')
fi
fix "sed -i 's/($srch)/($rplc)/g' $f"
else
echo "-- ERROR: $f - Can't determine relative path of $alnk"
fi
else
echo "-- ERROR: $f - $alnk is a broken link"
EXITCODE=1
return
fi
}
testURL () {
if [ "$TESTURLS" -eq 0 ] ; then return 0 ; fi
dbg "-- Testing URL $1"
curl -sS "$1" > /dev/null
if [ $? -gt 0 ] ; then
return 1
fi
return 0
}
testinternal () {
# Check if the header referred to by the internal link exists in the same file
ff=${1}
ifile=${2}
ilnk=${3}
header=${ilnk//-/}
dbg "-- Searching for \"$header\" in $ifile"
tr -d ',_.:? `'< "$ifile" | sed 's/-//g' | grep -i "^\\#*$header\$" >/dev/null
if [ $? -eq 0 ] ; then
dbg "-- $ilnk found in $ifile"
return 0
else
echo "-- ERROR: $ff - $ilnk header not found in file $ifile"
EXITCODE=1
return 1
fi
}
testf () {
sf=$1
tf=$2
if [ -f "$tf" ] ; then
dbg "-- $tf exists"
return 0
else
echo "-- ERROR: $sf - $tf does not exist"
EXITCODE=1
return 1
fi
}
ck_netdata_relative () {
f=${1}
rlnk=${2}
dbg "-- Checking relative link $rlnk"
fpath="."
fname="$f"
# First ensure that the link works in the repo, then try to fix it in htmldocs
if [[ $f =~ ^(.*)/([^/]*)$ ]] ; then
fpath="${BASH_REMATCH[1]}"
fname="${BASH_REMATCH[2]}"
dbg "-- Current file is at $fpath"
else
dbg "-- Current file is at root directory"
fi
# Cases to handle:
# (#somelink)
# (path/)
# (path/#somelink)
# (path/filename.md) -> htmldoc (path/filename/)
# (path/filename.md#somelink) -> htmldoc (path/filename/#somelink)
# (path#somelink) -> htmldoc (path/#somelink)
# (path/someotherfile) -> htmldoc (absolutelink)
# (path) -> htmldoc (path/)
TRGT=""
s=""
case "$rlnk" in
\#* )
dbg "-- # (#somelink)"
testinternal "$f" "$f" "$rlnk"
;;
*/ )
dbg "-- # (path/)"
TRGT="$fpath/${rlnk}README.md"
testf "$f" "$TRGT"
if [ $? -eq 0 ] ; then
if [ "$fname" != "README.md" ] ; then s="../$rlnk"; fi
fi
;;
*/\#* )
dbg "-- # (path/#somelink)"
if [[ $rlnk =~ ^(.*)/#(.*)$ ]] ; then
TRGT="$fpath/${BASH_REMATCH[1]}/README.md"
LNK="#${BASH_REMATCH[2]}"
dbg "-- Look for $LNK in $TRGT"
testf "$f" "$TRGT"
if [ $? -eq 0 ] ; then
testinternal "$f" "$TRGT" "$LNK"
if [ $? -eq 0 ] ; then
if [ "$fname" != "README.md" ] ; then s="../$rlnk"; fi
fi
fi
fi
;;
*.md )
dbg "-- # (path/filename.md) -> htmldoc (path/filename/)"
testf "$f" "$fpath/$rlnk"
if [ $? -eq 0 ] ; then
if [[ $rlnk =~ ^(.*)/(.*).md$ ]] ; then
if [ "${BASH_REMATCH[2]}" = "README" ] ; then
s="../${BASH_REMATCH[1]}/"
else
s="../${BASH_REMATCH[1]}/${BASH_REMATCH[2]}/"
fi
if [ "$fname" != "README.md" ] ; then s="../$s"; fi
fi
fi
;;
*.md\#* )
dbg "-- # (path/filename.md#somelink) -> htmldoc (path/filename/#somelink)"
if [[ $rlnk =~ ^(.*)#(.*)$ ]] ; then
TRGT="$fpath/${BASH_REMATCH[1]}"
LNK="#${BASH_REMATCH[2]}"
testf "$f" "$TRGT"
if [ $? -eq 0 ] ; then
testinternal "$f" "$TRGT" "$LNK"
if [ $? -eq 0 ] ; then
if [[ $lnk =~ ^(.*)/(.*).md#(.*)$ ]] ; then
if [ "${BASH_REMATCH[2]}" = "README" ] ; then
s="../${BASH_REMATCH[1]}/#${BASH_REMATCH[3]}"
else
s="../${BASH_REMATCH[1]}/${BASH_REMATCH[2]}/#${BASH_REMATCH[3]}"
fi
if [ "$fname" != "README.md" ] ; then s="../$s"; fi
fi
fi
fi
fi
;;
*\#* )
dbg "-- # (path#somelink) -> (path/#somelink)"
if [[ $rlnk =~ ^(.*)#(.*)$ ]] ; then
TRGT="$fpath/${BASH_REMATCH[1]}/README.md"
LNK="#${BASH_REMATCH[2]}"
testf "$f" "$TRGT"
if [ $? -eq 0 ] ; then
testinternal "$f" "$TRGT" "$LNK"
if [ $? -eq 0 ] ; then
if [[ $rlnk =~ ^(.*)#(.*)$ ]] ; then
s="${BASH_REMATCH[1]}/#${BASH_REMATCH[2]}"
if [ "$fname" != "README.md" ] ; then s="../$s"; fi
fi
fi
fi
fi
;;
* )
if [ -f "$fpath/$rlnk" ] ; then
dbg "-- # (path/someotherfile) $rlnk"
if [ "$fpath" = "." ] ; then
s="https://github.com/netdata/netdata/tree/master/$rlnk"
else
s="https://github.com/netdata/netdata/tree/master/$fpath/$rlnk"
fi
else
if [ -d "$fpath/$rlnk" ] ; then
dbg "-- # (path) -> htmldoc (path/)"
testf "$f" "$fpath/$rlnk/README.md"
if [ $? -eq 0 ] ; then
s="$rlnk/"
if [ "$fname" != "README.md" ] ; then s="../$s"; fi
fi
else
echo "-- ERROR: $f - $rlnk is neither a file or a directory. Giving up!"
EXITCODE=1
fi
fi
;;
esac
if [[ ! -z $s ]] ; then
srch=$(echo "$rlnk" | sed 's/\//\\\//g')
rplc=$(echo "$s" | sed 's/\//\\\//g')
fix "sed -i 's/($srch)/($rplc)/g' $GENERATOR_DIR/src/$f"
fi
}
checklinks () {
f=$1
dbg "Checking $f"
while read -r l ; do
for word in $l ; do
if [[ $word =~ .*\]\(([^\(\) ]*)\).* ]] ; then
lnk="${BASH_REMATCH[1]}"
if [ -z "$lnk" ] ; then continue ; fi
dbg "-$lnk"
case "$lnk" in
mailto:* ) dbg "-- Mailto link, ignoring" ;;
https://github.com/netdata/netdata/wiki* )
dbg "-- Wiki Link $lnk"
if [ "$CHKWIKI" -eq 1 ] ; then echo "-- WARNING: $f - $lnk points to the wiki. Please replace it manually" ; fi
;;
https://github.com/netdata/netdata/????/master* )
dbg "-- Absolute link $lnk"
if [ "$CHKABSOLUTE" -eq 1 ] ; then ck_netdata_absolute "$f" "$lnk" "$lnk" ; fi
;;
http* )
dbg "-- External link $lnk"
if [ "$CHKEXTERNAL" -eq 1 ] ; then
testURL "$lnk"
if [ $? -eq 1 ] ; then
echo "-- ERROR: $f - $lnk is a broken link"
EXITCODE=1
fi
fi
;;
* )
dbg "-- Relative link $lnk"
if [ "$CHKRELATIVE" -eq 1 ] ; then ck_netdata_relative "$f" "$lnk" ; fi
;;
esac
fi
done
done < "$f"
}
TESTURLS=0
VERBOSE=0
RECURSIVE=0
EXECUTE=0
CHKWIKI=0
CHKABSOLUTE=0
CHKEXTERNAL=0
CHKRELATIVE=0
while getopts :f:rxuvwbela option
do
case "$option" in
f)
file=$OPTARG
;;
r)
RECURSIVE=1
;;
x)
EXECUTE=1
;;
u)
TESTURLS=1
;;
v)
VERBOSE=1
;;
w)
CHKWIKI=1
;;
b)
CHKABSOLUTE=1
;;
e)
CHKEXTERNAL=1
;;
l)
CHKRELATIVE=1
;;
a)
CHKWIKI=1
CHKABSOLUTE=1
CHKEXTERNAL=1
CHKRELATIVE=1
;;
*)
printhelp
exit 1
;;
esac
done
EXITCODE=0
if [ -z "${file}" ] ; then
if [ $RECURSIVE -eq 0 ] ; then
printhelp
exit 1
fi
for f in $(find . -type d \( -path ./${GENERATOR_DIR} -o -path ./node_modules \) -prune -o -name "*.md" -print); do
checklinks "$f"
done
else
if [ $RECURSIVE -eq 1 ] ; then
printhelp
exit 1
fi
checklinks "$file"
fi
exit $EXITCODE