--- a
+++ b/justfile
@@ -0,0 +1,346 @@
+set dotenv-load := true
+set positional-arguments
+
+
+export VIRTUAL_ENV  := env_var_or_default("VIRTUAL_ENV", ".venv")
+
+export BIN := VIRTUAL_ENV + if os_family() == "unix" { "/bin" } else { "/Scripts" }
+export PIP := BIN + if os_family() == "unix" { "/python -m pip" } else { "/python.exe -m pip" }
+
+
+alias help := list
+
+# List available commands
+list:
+    @just --list --unsorted
+
+
+# Ensure valid virtualenv
+virtualenv:
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    # allow users to specify python version in .env
+    PYTHON_VERSION=${PYTHON_VERSION:-python3.11}
+
+    # create venv and upgrade pip
+    if [[ ! -d $VIRTUAL_ENV ]]; then
+      $PYTHON_VERSION -m venv $VIRTUAL_ENV
+      $PIP install --upgrade pip
+    fi
+
+
+# Run pip-compile with our standard settings
+pip-compile *ARGS: devenv
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    $BIN/pip-compile --allow-unsafe --generate-hashes --strip-extras "$@"
+
+
+update-dependencies: devenv
+  just pip-compile -U requirements.prod.in
+  just pip-compile -U requirements.dev.in
+
+# Ensure dev and prod requirements installed and up to date
+devenv: virtualenv
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    for req_file in requirements.dev.txt requirements.prod.txt pyproject.minimal.toml; do
+      # If we've installed this file before and the original hasn't been
+      # modified since then bail early
+      record_file="$VIRTUAL_ENV/$req_file"
+      if [[ -e "$record_file" && "$record_file" -nt "$req_file" ]]; then
+        continue
+      fi
+
+      if cmp --silent "$req_file" "$record_file"; then
+        # If the timestamp has been changed but not the contents (as can happen
+        # when switching branches) then just update the timestamp
+        touch "$record_file"
+      else
+        # Otherwise actually install the requirements
+
+        if [[ "$req_file" == *.txt ]]; then
+          # --no-deps is recommended when using hashes, and also works around a
+          # bug with constraints and hashes. See:
+          # https://pip.pypa.io/en/stable/topics/secure-installs/#do-not-use-setuptools-directly
+          $PIP install --no-deps --requirement "$req_file"
+        elif [[ "$req_file" == *.toml ]]; then
+          $PIP install --no-deps --editable "$(dirname "$req_file")"
+        else
+          echo "Unhandled file: $req_file"
+          exit 1
+        fi
+
+        # Make a record of what we just installed
+        cp "$req_file" "$record_file"
+      fi
+    done
+
+    if [[ ! -f .git/hooks/pre-commit ]]; then
+      $BIN/pre-commit install
+    fi
+
+
+# Lint and check formatting but don't modify anything
+check: devenv
+    #!/usr/bin/env bash
+
+    failed=0
+
+    check() {
+      # Display the command we're going to run, in bold and with the "$BIN/"
+      # prefix removed if present
+      echo -e "\e[1m=> ${1#"$BIN/"}\e[0m"
+      # Run it
+      eval $1
+      # Increment the counter on failure
+      if [[ $? != 0 ]]; then
+        failed=$((failed + 1))
+        # Add spacing to separate the error output from the next check
+        echo -e "\n"
+      fi
+    }
+
+    check "$BIN/ruff format --diff --quiet ."
+    check "$BIN/ruff check --output-format=full ."
+    check "docker run --rm -i ghcr.io/hadolint/hadolint:v2.12.0-alpine < Dockerfile"
+
+    if [[ $failed > 0 ]]; then
+      echo -en "\e[1;31m"
+      echo "   $failed checks failed"
+      echo -e "\e[0m"
+      exit 1
+    fi
+
+
+# Fix any automatically fixable linting or formatting errors
+fix: devenv
+    $BIN/ruff format .
+    $BIN/ruff check --fix .
+
+
+# Build the ehrQL docker image
+build-ehrql image_name="ehrql-dev" *args="":
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    export BUILD_DATE=$(date -u +'%y-%m-%dT%H:%M:%SZ')
+    export GITREF=$(git rev-parse --short HEAD)
+
+    [[ -v CI ]] && echo "::group::Build ehrql Docker image (click to view)" || echo "Build ehrql Docker image"
+    DOCKER_BUILDKIT=1 docker build . --build-arg BUILD_DATE="$BUILD_DATE" --build-arg GITREF="$GITREF" --tag {{ image_name }} {{ args }}
+    [[ -v CI ]] && echo "::endgroup::" || echo ""
+
+
+# Build a docker image tagged `ehrql:dev` that can be used in `project.yaml` for local testing
+build-ehrql-for-os-cli: build-ehrql
+    docker tag ehrql-dev ghcr.io/opensafely-core/ehrql:dev
+
+
+# Tear down the persistent docker containers we create to run tests again
+remove-database-containers:
+    docker rm --force ehrql-mssql ehrql-trino
+
+
+# Create an MSSQL docker container with the TPP database schema and print connection strings
+create-tpp-test-db: devenv
+    $BIN/python -m pytest -o python_functions=create tests/lib/create_tpp_test_db.py
+
+
+# Open an interactive SQL Server shell running against MSSQL
+connect-to-mssql:
+    # Only pass '-t' argument to Docker if stdin is a TTY so you can pipe a SQL
+    # file to this commmand as well as using it interactively.
+    docker exec -i `[ -t 0 ] && echo '-t'` \
+        ehrql-mssql \
+            /opt/mssql-tools18/bin/sqlcmd -C -S localhost -U sa -P 'Your_password123!' -d test
+
+
+# Open an interactive trino shell
+connect-to-trino:
+    docker exec -it ehrql-trino trino --catalog trino --schema default
+
+
+###################################################################
+# Testing targets
+###################################################################
+
+# Run all or some pytest tests
+test *ARGS: devenv
+    $BIN/python -m pytest "$@"
+
+
+# Run the acceptance tests only
+test-acceptance *ARGS: devenv
+    $BIN/python -m pytest tests/acceptance "$@"
+
+
+# Run the backend validation tests only
+test-backend-validation *ARGS: devenv
+    $BIN/python -m pytest tests/backend_validation "$@"
+
+
+# Run the ehrql-in-docker tests only
+test-docker *ARGS: devenv
+    $BIN/python -m pytest tests/docker "$@"
+
+
+# Run the docs examples tests only
+test-docs-examples *ARGS: devenv
+    $BIN/python -m pytest tests/docs "$@"
+
+
+# Run the integration tests only
+test-integration *ARGS: devenv
+    $BIN/python -m pytest tests/integration "$@"
+
+
+# Run the spec tests only
+test-spec *ARGS: devenv
+    $BIN/python -m pytest tests/spec "$@"
+
+
+# Run the unit tests only
+test-unit *ARGS: devenv
+    $BIN/python -m pytest tests/unit "$@"
+    $BIN/python -m pytest --doctest-modules ehrql
+
+
+# Run the generative tests only, configured to use more than the tiny default
+# number of examples. Optional args are passed to pytest.
+#
+# Set GENTEST_DEBUG env var to see stats.
+# Set GENTEST_EXAMPLES to change the number of examples generated.
+# Set GENTEST_MAX_DEPTH to change the depth of generated query trees.
+#
+# Run generative tests using more than the small deterministic set of examples used in CI
+test-generative *ARGS: devenv
+    GENTEST_EXAMPLES=${GENTEST_EXAMPLES:-200} \
+      $BIN/python -m pytest tests/generative "$@"
+
+
+# Run all test as they will be run in  CI (checking code coverage etc)
+@test-all *ARGS: devenv generate-docs
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    GENTEST_DERANDOMIZE=t \
+    GENTEST_EXAMPLES=${GENTEST_EXAMPLES:-100} \
+    GENTEST_CHECK_IGNORED_ERRORS=t \
+      $BIN/python -m pytest \
+        --cov=ehrql \
+        --cov=tests \
+        --cov-report=html \
+        --cov-report=term-missing:skip-covered \
+        "$@"
+    $BIN/python -m pytest --doctest-modules ehrql
+
+
+# Convert a raw failing example from Hypothesis's output into a simplified test case
+gentest-example-simplify *ARGS: devenv
+    $BIN/python -m tests.lib.gentest_example_simplify "$@"
+
+
+# Run a generative test example defined in the supplied file
+gentest-example-run example *ARGS: devenv
+    GENTEST_EXAMPLE_FILE='{{example}}' \
+    $BIN/python -m pytest \
+        tests/generative/test_query_model.py::test_query_model_example_file \
+            "$@"
+
+
+generate-docs OUTPUT_DIR="docs/includes/generated_docs": devenv
+    $BIN/python -m ehrql.docs {{ OUTPUT_DIR }}
+    echo "Generated data for documentation in {{ OUTPUT_DIR }}"
+
+
+# Run the documentation server: to configure the port, append: ---dev-addr localhost:<port>
+docs-serve *ARGS: devenv generate-docs
+    # Run the MkDocs server with `--clean` to enforce the `exclude_docs` option.
+    # This removes false positives that pertain to the autogenerated documentation includes.
+    "$BIN"/mkdocs serve --clean "$@"
+
+
+# Build the documentation
+docs-build *ARGS: devenv generate-docs
+    "$BIN"/mkdocs build --clean --strict "$@"
+
+
+# Run the snippet tests
+docs-test: devenv
+    echo "Not implemented here"
+
+
+# Check the dataset public docs are current
+docs-check-generated-docs-are-current: generate-docs
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    # https://stackoverflow.com/questions/3878624/how-do-i-programmatically-determine-if-there-are-uncommitted-changes
+    # git diff --exit-code won't pick up untracked files, which we also want to check for.
+    if [[ -z $(git status --porcelain ./docs/includes/generated_docs/; git clean -nd ./docs/includes/generated_docs/) ]]
+    then
+      echo "Generated docs directory is current and free of other files/directories."
+    else
+      echo "Generated docs directory contains files/directories not in the repository."
+      git diff ./docs/includes/generated_docs/; git clean -n ./docs/includes/generated_docs/
+      exit 1
+    fi
+
+
+update-external-studies: devenv
+    $BIN/python -m tests.acceptance.update_external_studies
+
+
+update-tpp-schema: devenv
+    #!/usr/bin/env bash
+    set -euo pipefail
+
+    echo 'Fetching latest tpp_schema.csv'
+    $BIN/python -m tests.lib.update_tpp_schema fetch
+    echo 'Building new tpp_schema.py'
+    $BIN/python -m tests.lib.update_tpp_schema build
+
+
+update-pledge: devenv
+    #!/usr/bin/env bash
+    set -euo pipefail
+    URL_RECORD_FILE="bin/cosmopolitan-release-url.txt"
+    ZIP_URL="$(
+      $BIN/python -c \
+        'import requests; print([
+            asset["browser_download_url"]
+            for release in requests.get("https://api.github.com/repos/jart/cosmopolitan/releases").json()
+            for asset in release["assets"]
+            if asset["name"].startswith("cosmos-") and asset["name"].endswith(".zip")
+        ][0])'
+    )"
+    echo "Latest Cosmopolitation release: $ZIP_URL"
+    if grep -Fxqs "$ZIP_URL" "$URL_RECORD_FILE"; then
+       echo "Already up to date."
+       exit 0
+    fi
+
+    if [[ "$(uname -s)" != "Linux" ]]; then
+      echo "This command can only be run on a Linux system because we need to"
+      echo " \"assimilate\" `pledge` to be a regular Linux executable"
+      exit 1
+    fi
+
+    echo "Downloading ..."
+    TMP_FILE="$(mktemp)"
+    curl --location --output "$TMP_FILE" "$ZIP_URL"
+    unzip -o -j "$TMP_FILE" bin/pledge -d bin/
+    rm "$TMP_FILE"
+
+    # Rewrite the file header so it becomes a native Linux executable which we
+    # can run directly without needing a shell. See:
+    # https://justine.lol/apeloader/
+    echo "Assimilating ..."
+    sh bin/pledge --assimilate
+
+    echo "Complete."
+    echo "$ZIP_URL" > "$URL_RECORD_FILE"