[de1a2e]: / .github / workflows / lint-tsv

Download this file

49 lines (39 with data), 1.1 kB

#!/usr/bin/env -S awk -f
# lint .tsv files
# - make sure every record is the same length
# - make sure every field has content - even a null value should at least be '-'
# <nick@kousu.ca> 2021, public domain
#
# TODO: package this for public consumption
# TODO: make BSD-awk compatible

BEGIN { FS="\t" }

NR == 1 { NF_HEADER=NF }

NF != NF_HEADER {
  print "incorrect number of columns, line " NR
  LINE_ERROR=1
}

{
  for(i=1; i<=NF; i++) {

    # strip surrounding whitespace
    # is there a better way?
    s=$i
    sub(/(^[[:space:]]+)/, "", s)
    sub(/([[:space:]]+)$/, "", s)
    #print "|" $i "| -> |" s "|" # DEBUG

    # check that each field is stripped and non-null.
    if(length(s) == 0) {
      print "empty field, line " NR ", column " i ". Please use '-' for null values."
      LINE_ERROR=1
    }
    else if($i != s) {
      print "extraneous whitespace, line " NR ", column " i ": '" $i "'"
      LINE_ERROR=1
    }

  }
}

LINE_ERROR==1 {
    print "errors in line " NR ": \n\t'" $0 "'\n"
    ANY_ERROR=1
    LINE_ERROR=0 # reset for next time
}

END { if(ANY_ERROR) { exit 1 } }