From 02150022ab81355e5f47ee4f872f9cabf4d3d66a Mon Sep 17 00:00:00 2001 From: Ole Herman Schumacher Elgesem Date: Fri, 3 Apr 2026 02:07:26 +0200 Subject: [PATCH 1/8] Added linting tests for namespaces Signed-off-by: Ole Herman Schumacher Elgesem --- tests/lint/008_namespace.cf | 16 ++++++++++++++++ tests/lint/008_namespace.output.txt | 7 +++++++ tests/lint/008_namespace.x.cf | 16 ++++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 tests/lint/008_namespace.cf create mode 100644 tests/lint/008_namespace.output.txt create mode 100644 tests/lint/008_namespace.x.cf diff --git a/tests/lint/008_namespace.cf b/tests/lint/008_namespace.cf new file mode 100644 index 0000000..301e8e1 --- /dev/null +++ b/tests/lint/008_namespace.cf @@ -0,0 +1,16 @@ +body file control +{ + namespace => "mylib"; +} + +bundle agent target(a) +{ + reports: + "Hello, $(a)"; +} + +bundle agent helper +{ + vars: + "x" string => mylib:target("arg"); +} diff --git a/tests/lint/008_namespace.output.txt b/tests/lint/008_namespace.output.txt new file mode 100644 index 0000000..0ed3872 --- /dev/null +++ b/tests/lint/008_namespace.output.txt @@ -0,0 +1,7 @@ + + vars: + "x" string => default:target("arg"); + ^------------^ +Error: Call to unknown function / bundle / body 'default:target' at at tests/lint/008_namespace.x.cf:15:19 +FAIL: tests/lint/008_namespace.x.cf (1 errors) +Failure, 1 errors in total. diff --git a/tests/lint/008_namespace.x.cf b/tests/lint/008_namespace.x.cf new file mode 100644 index 0000000..1876c0a --- /dev/null +++ b/tests/lint/008_namespace.x.cf @@ -0,0 +1,16 @@ +body file control +{ + namespace => "mylib"; +} + +bundle agent target(a) +{ + reports: + "Hello, $(a)"; +} + +bundle agent helper +{ + vars: + "x" string => default:target("arg"); +} From 895190d557d553405fc60213bc491982b3b31f0b Mon Sep 17 00:00:00 2001 From: Ole Herman Schumacher Elgesem Date: Fri, 3 Apr 2026 02:07:41 +0200 Subject: [PATCH 2/8] tests: Corrected plural s in output Signed-off-by: Ole Herman Schumacher Elgesem --- tests/lint/002_ifvarclass.output.txt | 4 ++-- tests/lint/003_deprecated_promise_type.output.txt | 4 ++-- tests/lint/004_bundle_name_lowercase.output.txt | 4 ++-- tests/lint/005_bundle_type.output.txt | 4 ++-- tests/lint/006_syntax_error.output.txt | 4 ++-- tests/lint/007_empty_file.output.txt | 4 ++-- tests/lint/008_namespace.output.txt | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/lint/002_ifvarclass.output.txt b/tests/lint/002_ifvarclass.output.txt index af068bf..6ffa1cb 100644 --- a/tests/lint/002_ifvarclass.output.txt +++ b/tests/lint/002_ifvarclass.output.txt @@ -3,5 +3,5 @@ ifvarclass => "cfengine"; ^--------^ Deprecation: Use 'if' instead of 'ifvarclass' at tests/lint/002_ifvarclass.x.cf:5:7 -FAIL: tests/lint/002_ifvarclass.x.cf (1 errors) -Failure, 1 errors in total. +FAIL: tests/lint/002_ifvarclass.x.cf (1 error) +Failure, 1 error in total. diff --git a/tests/lint/003_deprecated_promise_type.output.txt b/tests/lint/003_deprecated_promise_type.output.txt index 94f2fbd..36f78b3 100644 --- a/tests/lint/003_deprecated_promise_type.output.txt +++ b/tests/lint/003_deprecated_promise_type.output.txt @@ -3,5 +3,5 @@ defaults: ^-------^ Deprecation: Promise type 'defaults' is deprecated at tests/lint/003_deprecated_promise_type.x.cf:3:3 -FAIL: tests/lint/003_deprecated_promise_type.x.cf (1 errors) -Failure, 1 errors in total. +FAIL: tests/lint/003_deprecated_promise_type.x.cf (1 error) +Failure, 1 error in total. diff --git a/tests/lint/004_bundle_name_lowercase.output.txt b/tests/lint/004_bundle_name_lowercase.output.txt index 7e5e3f5..484d04e 100644 --- a/tests/lint/004_bundle_name_lowercase.output.txt +++ b/tests/lint/004_bundle_name_lowercase.output.txt @@ -2,5 +2,5 @@ bundle agent MyBundle ^------^ Convention: Bundle name should be lowercase at tests/lint/004_bundle_name_lowercase.x.cf:1:14 -FAIL: tests/lint/004_bundle_name_lowercase.x.cf (1 errors) -Failure, 1 errors in total. +FAIL: tests/lint/004_bundle_name_lowercase.x.cf (1 error) +Failure, 1 error in total. diff --git a/tests/lint/005_bundle_type.output.txt b/tests/lint/005_bundle_type.output.txt index a1f4412..89b1b44 100644 --- a/tests/lint/005_bundle_type.output.txt +++ b/tests/lint/005_bundle_type.output.txt @@ -2,5 +2,5 @@ bundle notavalidtype my_bundle ^-----------^ Error: Bundle type must be one of (agent, common, monitor, server, edit_line, edit_xml), not 'notavalidtype' at tests/lint/005_bundle_type.x.cf:1:8 -FAIL: tests/lint/005_bundle_type.x.cf (1 errors) -Failure, 1 errors in total. +FAIL: tests/lint/005_bundle_type.x.cf (1 error) +Failure, 1 error in total. diff --git a/tests/lint/006_syntax_error.output.txt b/tests/lint/006_syntax_error.output.txt index 5e55fa8..de31969 100644 --- a/tests/lint/006_syntax_error.output.txt +++ b/tests/lint/006_syntax_error.output.txt @@ -3,5 +3,5 @@ reports ^-----^ Error: Syntax error at tests/lint/006_syntax_error.x.cf:3:3 -FAIL: tests/lint/006_syntax_error.x.cf (1 errors) -Failure, 1 errors in total. +FAIL: tests/lint/006_syntax_error.x.cf (1 error) +Failure, 1 error in total. diff --git a/tests/lint/007_empty_file.output.txt b/tests/lint/007_empty_file.output.txt index f4ab399..8d17cd1 100644 --- a/tests/lint/007_empty_file.output.txt +++ b/tests/lint/007_empty_file.output.txt @@ -1,3 +1,3 @@ Error: Empty policy file 'tests/lint/007_empty_file.x.cf' -FAIL: tests/lint/007_empty_file.x.cf (1 errors) -Failure, 1 errors in total. +FAIL: tests/lint/007_empty_file.x.cf (1 error) +Failure, 1 error in total. diff --git a/tests/lint/008_namespace.output.txt b/tests/lint/008_namespace.output.txt index 0ed3872..c4ce8ac 100644 --- a/tests/lint/008_namespace.output.txt +++ b/tests/lint/008_namespace.output.txt @@ -3,5 +3,5 @@ "x" string => default:target("arg"); ^------------^ Error: Call to unknown function / bundle / body 'default:target' at at tests/lint/008_namespace.x.cf:15:19 -FAIL: tests/lint/008_namespace.x.cf (1 errors) -Failure, 1 errors in total. +FAIL: tests/lint/008_namespace.x.cf (1 error) +Failure, 1 error in total. From 8173590cbbcbdeefeedfd77ae969c1e26fd920c2 Mon Sep 17 00:00:00 2001 From: Ole Herman Schumacher Elgesem Date: Fri, 3 Apr 2026 02:07:44 +0200 Subject: [PATCH 3/8] Adjusted linting tests to use similar file name conventions as format tests Signed-off-by: Ole Herman Schumacher Elgesem --- .gitignore | 1 + ...{002_ifvarclass.output.txt => 002_ifvarclass.expected.txt} | 0 ...pe.output.txt => 003_deprecated_promise_type.expected.txt} | 0 ...case.output.txt => 004_bundle_name_lowercase.expected.txt} | 0 ...05_bundle_type.output.txt => 005_bundle_type.expected.txt} | 0 ..._syntax_error.output.txt => 006_syntax_error.expected.txt} | 0 ...{007_empty_file.output.txt => 007_empty_file.expected.txt} | 0 .../{008_namespace.output.txt => 008_namespace.expected.txt} | 0 tests/run-lint-tests.sh | 4 ++-- 9 files changed, 3 insertions(+), 2 deletions(-) rename tests/lint/{002_ifvarclass.output.txt => 002_ifvarclass.expected.txt} (100%) rename tests/lint/{003_deprecated_promise_type.output.txt => 003_deprecated_promise_type.expected.txt} (100%) rename tests/lint/{004_bundle_name_lowercase.output.txt => 004_bundle_name_lowercase.expected.txt} (100%) rename tests/lint/{005_bundle_type.output.txt => 005_bundle_type.expected.txt} (100%) rename tests/lint/{006_syntax_error.output.txt => 006_syntax_error.expected.txt} (100%) rename tests/lint/{007_empty_file.output.txt => 007_empty_file.expected.txt} (100%) rename tests/lint/{008_namespace.output.txt => 008_namespace.expected.txt} (100%) diff --git a/.gitignore b/.gitignore index 178253e..2395d9f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ __pycache__ /tmp/ *.log *.output.cf +*.output.txt git_diff_exists commit_message.txt black_output.txt diff --git a/tests/lint/002_ifvarclass.output.txt b/tests/lint/002_ifvarclass.expected.txt similarity index 100% rename from tests/lint/002_ifvarclass.output.txt rename to tests/lint/002_ifvarclass.expected.txt diff --git a/tests/lint/003_deprecated_promise_type.output.txt b/tests/lint/003_deprecated_promise_type.expected.txt similarity index 100% rename from tests/lint/003_deprecated_promise_type.output.txt rename to tests/lint/003_deprecated_promise_type.expected.txt diff --git a/tests/lint/004_bundle_name_lowercase.output.txt b/tests/lint/004_bundle_name_lowercase.expected.txt similarity index 100% rename from tests/lint/004_bundle_name_lowercase.output.txt rename to tests/lint/004_bundle_name_lowercase.expected.txt diff --git a/tests/lint/005_bundle_type.output.txt b/tests/lint/005_bundle_type.expected.txt similarity index 100% rename from tests/lint/005_bundle_type.output.txt rename to tests/lint/005_bundle_type.expected.txt diff --git a/tests/lint/006_syntax_error.output.txt b/tests/lint/006_syntax_error.expected.txt similarity index 100% rename from tests/lint/006_syntax_error.output.txt rename to tests/lint/006_syntax_error.expected.txt diff --git a/tests/lint/007_empty_file.output.txt b/tests/lint/007_empty_file.expected.txt similarity index 100% rename from tests/lint/007_empty_file.output.txt rename to tests/lint/007_empty_file.expected.txt diff --git a/tests/lint/008_namespace.output.txt b/tests/lint/008_namespace.expected.txt similarity index 100% rename from tests/lint/008_namespace.output.txt rename to tests/lint/008_namespace.expected.txt diff --git a/tests/run-lint-tests.sh b/tests/run-lint-tests.sh index bf4deb4..624f6ff 100644 --- a/tests/run-lint-tests.sh +++ b/tests/run-lint-tests.sh @@ -21,12 +21,12 @@ for file in tests/lint/*.cf; do # - Fail (non-zero exit code) # - Output the correct error message - expected="$(echo $file | sed 's/\.x\.cf$/.output.txt/')" + expected="$(echo $file | sed 's/\.x\.cf$/.expected.txt/')" if [ ! -f "$expected" ]; then echo "FAIL: Missing expected output file: $expected" exit 1 fi - output="tmp/$(basename $file .x.cf).lint-output.txt" + output="tests/lint/$(basename $file .x.cf).output.txt" if cfengine lint "$file" > "$output" 2>&1; then echo "FAIL: $file - expected lint failure but got success" exit 1 From f9122be76dca966ab99dfe219705341b2c6c72e4 Mon Sep 17 00:00:00 2001 From: Ole Herman Schumacher Elgesem Date: Fri, 3 Apr 2026 15:42:25 +0200 Subject: [PATCH 4/8] Adjusted linting test 008 expectation Signed-off-by: Ole Herman Schumacher Elgesem --- tests/lint/008_namespace.expected.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lint/008_namespace.expected.txt b/tests/lint/008_namespace.expected.txt index c4ce8ac..e7e3fd0 100644 --- a/tests/lint/008_namespace.expected.txt +++ b/tests/lint/008_namespace.expected.txt @@ -2,6 +2,6 @@ vars: "x" string => default:target("arg"); ^------------^ -Error: Call to unknown function / bundle / body 'default:target' at at tests/lint/008_namespace.x.cf:15:19 +Error: Call to unknown function / bundle / body 'default:target' at tests/lint/008_namespace.x.cf:15:19 FAIL: tests/lint/008_namespace.x.cf (1 error) Failure, 1 error in total. From c1af92d83a1881b443298315611ecdf7d6acfcee Mon Sep 17 00:00:00 2001 From: Ole Herman Schumacher Elgesem Date: Fri, 3 Apr 2026 02:07:30 +0200 Subject: [PATCH 5/8] Added makefile to make running tests easier Signed-off-by: Ole Herman Schumacher Elgesem --- Makefile | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..99d5df0 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +.PHONY: check format lint install + +default: check + +format: + uv tool run black . + prettier . --write + +lint: + uv tool run black --check . + uv tool run flake8 src/ --ignore=E203,W503,E722,E731 --max-complexity=100 --max-line-length=160 + uv tool run pyflakes src/ + uv tool run pyright src/ + +check: format lint install + uv tool run black --check . + uv tool run flake8 src/ --ignore=E203,W503,E722,E731 --max-complexity=100 --max-line-length=160 + uv tool run pyflakes src/ + uv tool run pyright src/ + uv run pytest + bash tests/run-lint-tests.sh + bash tests/run-format-tests.sh + bash tests/run-shell-tests.sh + +install: + pipx install --force --editable . From bb8dcdd611d87e005dcdfcc5a489077f4d608c6e Mon Sep 17 00:00:00 2001 From: Ole Herman Schumacher Elgesem Date: Fri, 3 Apr 2026 02:07:33 +0200 Subject: [PATCH 6/8] HACKING.md: Fixed outdated information Signed-off-by: Ole Herman Schumacher Elgesem --- HACKING.md | 98 ++++++++++++++++++++++++++---------------------------- 1 file changed, 47 insertions(+), 51 deletions(-) diff --git a/HACKING.md b/HACKING.md index f97bc94..df9a2bf 100644 --- a/HACKING.md +++ b/HACKING.md @@ -4,6 +4,53 @@ This document aims to have relevant information for people contributing to and m It is not necessary for users of the tool to know about these processes. For general user information, see the [README](./README.md). +## Code formatting + +We use automated code formatters to ensure consistent code style / indentation. +Please format Python code with [black](https://pypi.org/project/black/), and YAML and markdown files with [Prettier](https://prettier.io/). +For simplicity's sake, we don't have a custom configuration, we use the tool's defaults. + +If your editor does not do this automatically, you can run these tools from the command line: + +```bash +make format +``` + +## Installing from source: + +For developers working on CFEngine CLI, it is recommended to install an editable version of the tool: + +```bash +make install +``` + +Some of the tests require that you have the CLI installed (they run `cfengine` commands). + +## Running commands without installing + +You can also run commands without installing, using `uv`: + +```bash +uv run cfengine format +``` + +## Running tests + +Use the makefile command to run all linting and tests: + +```bash +make check +``` + +Running individual test suites: + +```bash +uv run pytest +bash tests/run-lint-tests.sh +bash tests/run-format-tests.sh +bash tests/run-shell-tests.sh +``` + ## Releasing new versions Releases are [automated using a GH Action](https://github.com/cfengine/cfengine-cli/blob/main/.github/workflows/pypi-publish.yml) @@ -79,62 +126,11 @@ Copy the token and paste it into the GitHub Secret named `PYPI_PASSWORD`. `PYPI_USERNAME` should be there already, you don't have to edit it, it is simply `__token__`. Don't store the token anywhere else - we generate new tokens if necessary. -## Code formatting - -We use automated code formatters to ensure consistent code style / indentation. -Please format Python code with [black](https://pypi.org/project/black/), and YAML and markdown files with [Prettier](https://prettier.io/). -For simplicity's sake, we don't have a custom configuration, we use the tool's defaults. - -If your editor does not do this automatically, you can run these tools from the command line: - -```bash -black . && prettier . --write -``` - -## Running commands during development - -This project uses `uv`. -This makes it easy to run commands without installing the project, for example: - -```bash -uv run cfengine format -``` - -## Installing from source: - -```bash -git fetch --all --tags -pip3 install . -``` - -## Running tests - -Unit tests: - -```bash -py.test -``` - -Shell tests (requires installing first): - -```bash -cat tests/shell/*.sh | bash -``` - ## Not implemented yet / TODOs - `cfengine run` - The command could automatically detect that you have CFEngine installed on a remote hub, and run it there instead (using `cf-remote`). - Handle when `cf-agent` is not installed, help users install. - Prompt / help users do what they meant (i.e. build and deploy and run). -- `cfengine format` - - Automatically break up and indent method calls, function calls, and nested function calls. - - Smarter placement of comments based on context. - - The command should be able to take a filename as an argument, and also operate using stdin and stdout. - (Receive file content on stdin, file type using command line arg, output formatted file to stdout). - - We can add a shortcut, `cfengine fmt`, since that matches other tools, like `deno`. -- `cfengine lint` - - The command should be able to take a filename as an argument, and also take file content from stdin. - - It would be nice if we refactored `validate_config()` in `cfbs` so it would take a simple dictionary (JSON) instead of a special CFBSConfig object. - Missing commands: - `cfengine install` - Install CFEngine packages / binaries (Wrapping `cf-remote install`). From fb6a57ce20f12065c3448c6e101392521711f427 Mon Sep 17 00:00:00 2001 From: Ole Herman Schumacher Elgesem Date: Fri, 3 Apr 2026 02:07:43 +0200 Subject: [PATCH 7/8] README.md: Adjusted plural s Signed-off-by: Ole Herman Schumacher Elgesem --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2b80121..48d5fb8 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,8 @@ When it finds a mistake, it points out where the problem is like this; ifvarclass => "cfengine"; ^--------^ Deprecation: Use 'if' instead of 'ifvarclass' at main.cf:5:7 -FAIL: main.cf (1 errors) -Failure, 1 errors in total. +FAIL: main.cf (1 error) +Failure, 1 error in total. ``` Note that since we use a different parser than `cf-agent` / `cf-promises`, they are not 100% in sync. From dd743a17b926b1a3a05f295cd03a2066a4dff7df Mon Sep 17 00:00:00 2001 From: Ole Herman Schumacher Elgesem Date: Fri, 3 Apr 2026 02:07:45 +0200 Subject: [PATCH 8/8] Refactored linting code to fix issues and make it more future proof Almost a complete rewrite. It had a few issues: - Finding user defined bodies / bundles / promise types did not work when specifying file instead of folder. - Code paths for linting arg, vs file, vs folder, vs snippet were getting complicated and confusing. - Linting rules (node checks) and state changes were getting complicated. I wanted to simplify these before we add more rules. - Probably some other issues I cannot remember. Fixing the issues was not trivial, so I decided to refactor and fix at the same time. Signed-off-by: Ole Herman Schumacher Elgesem --- src/cfengine_cli/commands.py | 14 +- src/cfengine_cli/docs.py | 84 ++- src/cfengine_cli/lint.py | 1004 +++++++++++++++++++++++----------- 3 files changed, 735 insertions(+), 367 deletions(-) diff --git a/src/cfengine_cli/commands.py b/src/cfengine_cli/commands.py index ead79a1..6db55f2 100644 --- a/src/cfengine_cli/commands.py +++ b/src/cfengine_cli/commands.py @@ -4,7 +4,7 @@ import json from cfengine_cli.profile import profile_cfengine, generate_callstack from cfengine_cli.dev import dispatch_dev_subcommand -from cfengine_cli.lint import lint_folder, lint_single_arg +from cfengine_cli.lint import lint_args from cfengine_cli.shell import user_command from cfengine_cli.paths import bin from cfengine_cli.version import cfengine_cli_version_string @@ -96,20 +96,16 @@ def format(names, line_length) -> int: def _lint(files, strict) -> int: if not files: - return lint_folder(".", strict) - - errors = 0 - - for file in files: - errors += lint_single_arg(file, strict) - - return errors + return lint_args(["."], strict) + return lint_args(files, strict) def lint(files, strict) -> int: errors = _lint(files, strict) if errors == 0: print("Success, no errors found.") + elif errors == 1: + print("Failure, 1 error in total.") else: print(f"Failure, {errors} errors in total.") return errors diff --git a/src/cfengine_cli/docs.py b/src/cfengine_cli/docs.py index e2f0dcf..f0b7a80 100644 --- a/src/cfengine_cli/docs.py +++ b/src/cfengine_cli/docs.py @@ -16,7 +16,8 @@ from cfbs.pretty import pretty_file from cfbs.utils import find -from cfengine_cli.lint import lint_folder, lint_policy_file +from cfengine_cli.format import format_policy_file +from cfengine_cli.lint import lint_args, lint_policy_file_snippet from cfengine_cli.utils import UserError IGNORED_DIRS = [".git"] @@ -127,7 +128,7 @@ def fn_check_syntax( first_line, _last_line, snippet_number, - prefix=None, + prefix, ): snippet_abs_path = os.path.abspath(snippet_path) @@ -138,8 +139,13 @@ def fn_check_syntax( match language: case "cf": - r = lint_policy_file( - snippet_abs_path, origin_path, first_line + 1, snippet_number, prefix + r = lint_policy_file_snippet( + snippet_abs_path, + origin_path, + first_line + 1, + snippet_number, + prefix, + strict=False, ) if r != 0: raise UserError(f"Error when checking '{origin_path}'") @@ -189,6 +195,7 @@ def fn_replace(origin_path, snippet_path, _language, first_line, last_line, inde def fn_autoformat(_origin_path, snippet_path, language, _first_line, _last_line): + assert language in ("cf", "json") match language: case "json": try: @@ -203,6 +210,9 @@ def fn_autoformat(_origin_path, snippet_path, language, _first_line, _last_line) raise UserError(f"Couldn't open '{snippet_path}'") except json.decoder.JSONDecodeError: raise UserError(f"Invalid json in '{snippet_path}'") + case "cf": + # Note: Dead code - Not used for CFEngine policy yet + format_policy_file(snippet_path, 80) def _translate_language(x): @@ -241,10 +251,23 @@ def _process_markdown_code_blocks( origin_paths = sorted(parsed_markdowns["files"].keys()) origin_paths_len = len(origin_paths) + if origin_paths_len == 0: + print("No markdown files found.") + return + + if syntax_check: + # We currently only print the filenames during linting, not formatting + print( + f"Processing code blocks (snippets) inside {origin_paths_len} markdown files:" + ) for origin_paths_i, origin_path in enumerate(origin_paths): percentage = int(100 * (origin_paths_i + 1) / origin_paths_len) - prefix = f"[{origin_paths_i + 1}/{origin_paths_len} ({percentage}%)] " + spaces = " " * (4 - len(str(percentage))) + prefix = f"[{origin_paths_i + 1}/{origin_paths_len}{spaces}({percentage}%)] " offset = 0 + if syntax_check and not parsed_markdowns["files"][origin_path]["code-blocks"]: + print(f"{prefix}SKIP: No code blocks in '{origin_path}'") + continue for i, code_block in enumerate( parsed_markdowns["files"][origin_path]["code-blocks"] ): @@ -259,33 +282,44 @@ def _process_markdown_code_blocks( snippet_path = f"{origin_path}.snippet-{snippet_number}.{language}" flags = code_block["flags"] + first_line = code_block["first_line"] + last_line = code_block["last_line"] if "noextract" in flags or "skip" in flags: # code block was marked to be skipped + if syntax_check: + print( + f"{prefix}SKIP: Snippet {snippet_number} at '{origin_path}:{first_line}' ({language} {' '.join(flags)})" + ) continue if extract: fn_extract( origin_path, snippet_path, language, - code_block["first_line"], - code_block["last_line"], + first_line, + last_line, ) - if syntax_check and "novalidate" not in flags: - try: - fn_check_syntax( - origin_path, - snippet_path, - language, - code_block["first_line"], - code_block["last_line"], - snippet_number, - prefix, + if syntax_check: + if "novalidate" in flags: + print( + f"{prefix}SKIP: Snippet {snippet_number} at '{origin_path}:{first_line}' ({language} {' '.join(flags)})" ) - except Exception as e: - if cleanup: - os.remove(snippet_path) - raise e + else: + try: + fn_check_syntax( + origin_path, + snippet_path, + language, + first_line, + last_line, + snippet_number, + prefix, + ) + except Exception as e: + if cleanup: + os.remove(snippet_path) + raise e if output_check and "noexecute" not in flags: fn_check_output() @@ -295,8 +329,8 @@ def _process_markdown_code_blocks( origin_path, snippet_path, language, - code_block["first_line"], - code_block["last_line"], + first_line, + last_line, ) if replace and "noreplace" not in flags: @@ -304,7 +338,7 @@ def _process_markdown_code_blocks( origin_path, snippet_path, language, - code_block["first_line"], + first_line, code_block["last_line"], code_block["indent"], ) @@ -409,7 +443,7 @@ def check_docs() -> int: Run by the command: cfengine dev lint-docs""" - r = lint_folder(".", strict=False) + r = lint_args(["."], strict=False) if r != 0: return r _process_markdown_code_blocks( diff --git a/src/cfengine_cli/lint.py b/src/cfengine_cli/lint.py index 38882c9..b78988e 100644 --- a/src/cfengine_cli/lint.py +++ b/src/cfengine_cli/lint.py @@ -6,16 +6,36 @@ - cfbs.json (CFEngine Build project files) - *.json (basic JSON syntax checking) +This is performed in 3 steps: +1. Parsing - Read the .cf files and convert them into syntax trees +2. Discovery - Walk the syntax trees and record what is defined (bundles, + bodies, promise types) +3. Linting - Walk the syntax trees again and use linting rules to check for + errors + +By default, linting is strict about bundles and bodies being defined +somewhere in the supplied files / folders. This can be disabled using the +`--strict=no` flag. + Usage: $ cfengine lint +$ cfengine lint ./core/ ./masterfiles/ +$ cfengine lint --strict=no main.cf + +Todos: +- Snippet information should be moved to PolicyFile (instead of state) +- state.start_file(), state.end_file(), and state.get_pretty_filename() + are a bit awkward. Could make iteration nicer. +- JSON checking / parsing could be split up and/or reorganized a bit. """ +from enum import Enum import os import json -import itertools +from typing import Callable, Iterable import tree_sitter_cfengine as tscfengine from dataclasses import dataclass -from tree_sitter import Language, Parser +from tree_sitter import Language, Node, Parser, Tree from cfbs.validate import validate_config from cfbs.cfbs_config import CFBSConfig from cfbs.utils import find @@ -27,143 +47,418 @@ ) +def _qualify(name: str, namespace: str) -> str: + """If name is already qualified (contains ':'), return as-is. Otherwise prepend namespace.""" + assert '"' not in namespace + assert '"' not in name + if ":" in name: + return name + return f"{namespace}:{name}" + + +class Mode(Enum): + """We go through these states in order, and have asserts to check that + expected state is set.""" + + NONE = None + SYNTAX = 1 # Parse policy file and check for errors or empty + DISCOVER = 2 # Find user defined bodies / bundles / promise types + LINT = 3 # Run linting rules on syntax tree and check for errors + + +@dataclass +class Snippet: + """A snippet is a policy file from a markdown code block. + + When we're linting these, we keep additional information about the original + file and location.""" + + original_filename: str + original_line: int # The beginning line number of the snippet + i: int # 1-indexed number of snippet within original file + + +class PolicyFile: + """This class represents a parsed policy file + + The file is parsed once in the constructor, and then the syntax tree is + reused when we iterate over it multiple times. + + We store filename, raw data (bytes), array of lines, and syntax tree/nodes. + This is a but of "duplication", but they are useful for printing nice + linting errors. + + This is intended as a read-only view of the policy file, not to be used for + formatting / changing the policy. + + Might be expanded in the future to include more information such as: + - Whether the file is empty + - Whether the file has syntax errors + - Whether the file uses macros (Macros can indicate we need to be less strict) + - Things defined and referenced in the file (bundles, bodies, promise types, ) + """ + + def __init__(self, filename: str): + self.filename = filename + tree, lines, original_data = _parse_policy_file(filename) + self.tree = tree + self.lines = lines + self.original_data = original_data + + # Flatten tree so it is easier to iterate over: + self.nodes: list[Node] = [] + + def visit(x): + self.nodes.append(x) + return 0 + + _walk_callback(tree.root_node, visit) + + @dataclass -class _State: - block_type: str | None = None # "bundle" | "body" | "promise" | None +class State: + """This class is used to keep track of needed state while linting. + + Not used in parsing, that is handled entirely by tree sitter library. + Used when we are iterating over (walking) nodes in the syntax tree. + + It has 3 different sets of information: + 1. Where we are in policy (policy file, attribute name, promise type etc.) + 2. Things defined in the policy set (bundles, bodies, promise types) + 3. Information needed to print correct linting errors (prefix, snippet information) + """ + + block_keyword: str | None = None # "bundle" | "body" | "promise" | None + block_type: str | None = None + block_name: str | None = None promise_type: str | None = None # "vars" | "files" | "classes" | ... | None attribute_name: str | None = None # "if" | "string" | "slist" | ... | None namespace: str = "default" # "ns" | "default" | ... | + mode: Mode = Mode.NONE + walking: bool = False + strict: bool = True + bundles = {} + bodies = {} + custom_promise_types = {} + policy_file: PolicyFile | None = None + snippet: Snippet | None = None # Could be moved into PolicyFile + prefix: str | None = None + + def print_summary(self) -> None: + """Useful to print relevant information when debugging.""" + print("Bundles") + print(self.bundles) + print("Bodies") + print(self.bodies) + print("Custom promise types") + print(self.custom_promise_types) + + def get_pretty_filename(self) -> str: + """Filename, or code block number and location""" + assert self.policy_file + snippet = self.snippet + if not snippet: + return self.policy_file.filename + return f"Code block {snippet.i} (cf3) at '{snippet.original_filename}:{snippet.original_line}'" + + def get_location(self, line: int, column: int) -> str: + """File location (including line and col) used in error messages and + clickable in the right terminals / IDEs.""" + assert self.policy_file + filename = self.policy_file.filename + snippet = self.snippet + if snippet: + # If we are in a snippet (code block in markdown file), translate + # to original filename and correct line number in that file: + filename = snippet.original_filename + line = snippet.original_line + line + return f"{filename}:{line}:{column}" + + def get_location_extended(self, line: int, column: int) -> str: + """String to put in error messages which specifies where the issue is + and whether it is in a code block.""" + location = self.get_location(line, column) + assert self.policy_file + if not self.snippet: + return f"at {location}" + return f"in code block at {location}" + + def block_string(self) -> str | None: + """Returns for example "body file control" when you are inside body + file control block.""" + if not (self.block_keyword and self.block_type and self.block_name): + return None + + return " ".join((self.block_keyword, self.block_type, self.block_name)) + + def start_file(self, policy_file: PolicyFile): + """Should be called before the first state.navigate() when iterating + over a policy file syntax tree.""" + assert policy_file + assert not self.walking + assert self.mode != Mode.NONE + + self.policy_file = policy_file + self.namespace = "default" + self.walking = True + + def end_file(self) -> None: + """Should be called after the last state.navigate() when iterating + over a policy file syntax tree. + + Note: state.snippet is NOT automatically cleared. Caller needs to make + sure state.snippet is set and cleared appropriately when working + with code blocks. This is because the normal case is to iterate + over the same snippet (file) 2-3 times, so it would be annoying + if you have to reset snippet after each .end_file(). + """ + assert self.walking + assert self.mode != Mode.NONE + + # These should normally be unset automatically (for a valid policy + # file). However, if we are "aborting" in the middle of a policy + # file with syntax errors, we need to clear them. + self.block_keyword = None + self.block_type = None + self.block_name = None + self.promise_type = None + self.attribute_name = None + + self.walking = False + self.policy_file = None + + def add_bundle(self, name: str) -> None: + """This is called during discovery wherever a bundle is defined. + + For example: + bundle agent my_bundle {} + + """ + name = _qualify(name, self.namespace) + # TODO: In the future we will record more information than True, like: + # - Can be a list / dict of all places a bundle with that + # qualified name is defined in cases there are duplicates. + # - Can record the location of each definition + # - Can record the parameters / signature + # - Can record whether the bundle is inside a macro + # - Can have a list of classes and vars defined inside + self.bundles[name] = True + + def add_body(self, name: str) -> None: + """This is called during discovery wherever a body is defined. + + For example: + body contain my_body {} + + Control bundles are a special case, so would not be called for: + body file control {} + """ + name = _qualify(name, self.namespace) + self.bodies[name] = True + + def add_promise_type(self, name: str) -> None: + """This is called during discovery wherever a custom promise type is + defined. + + For example: + promise agent example + { + interpreter => "/usr/bin/python3"; + path => "/var/cfengine/inputs/modules/promises/git.py"; + } + """ + self.custom_promise_types[name] = True + + def navigate(self, node) -> None: + """This function is called whenever we move to a node, to update the + state accordingly. + + For example: + - When we encounter a closing } for a bundle, we want to set + block_keyword from "bundle" to None + """ + assert self.mode != Mode.NONE + assert self.walking + + # Some sanity checks - we want to see that state is correctly updated: + if node.type in ("}", ";", "->", "=>"): + # "Inside" a block + assert self.block_keyword and self.block_type and self.block_name + assert self.block_keyword in ("bundle", "body", "promise") + if node.type in (";", "->", "=>") and self.block_keyword == "bundle": + # "Inside" a non-empty bundle + assert self.promise_type + if node.type in ("->") and self.block_keyword == "bundle": + # Stakeholder arrow means we must be in bundle + assert self.block_keyword == "bundle" + if node.type == "=>": + assert self.attribute_name + + # Beginnings of blocks: + if node.type in ( + "bundle_block_keyword", + "body_block_keyword", + "promise_block_keyword", + ): + self.block_keyword = _text(node) + assert self.block_keyword in ("bundle", "body", "promise") + return + if node.type in ( + "bundle_block_type", + "body_block_type", + "promise_block_type", + ): + self.block_type = _text(node) + assert self.block_type + return + if node.type in ( + "bundle_block_name", + "body_block_name", + "promise_block_name", + ): + self.block_name = _text(node) + assert self.block_name + return - def update(self, node): - """Updates and returns the state that should apply to the children of `node`.""" - if node.type == "}": + # Update namespace inside body file control: + if ( + self.block_string() == "body file control" + and self.attribute_name == "namespace" + and node.type == "quoted_string" + ): + self.namespace = _text(node)[1:-1] + return + + # New promise type (bundle section) inside a bundle: + if node.type == "promise_guard": + self.promise_type = _text(node)[:-1] # strip trailing ':' + return + + if node.type == "attribute_name": + self.attribute_name = _text(node) + return + + # Attributes always end with ; in all 3 block types + if node.type == ";": + self.attribute_name = None + return + + # Clear things when ending a top level block: + if node.type == "}" and node.parent.type != "list": + assert self.attribute_name is None # Should already be cleared by ; assert node.parent assert node.parent.type in [ "bundle_block_body", "promise_block_body", "body_block_body", - "list", ] - if node.parent.type != "list": - # We just ended a block - self.block_type = None - self.promise_type = None - self.attribute_name = None - return - if node.type == ";": - self.attribute_name = None + self.block_keyword = None + self.block_type = None + self.block_name = None + self.promise_type = None return - if node.type == "bundle_block": - self.block_type = "bundle" - return - if node.type == "body_block": - self.block_type = "body" - return - if node.type == "promise_block": - self.block_type = "promise" - return - if node.type == "bundle_section": - # A bundle_section is always: promise_guard, [promises], [class_guarded_promises...] - # The promise_guard is guaranteed to exist by the grammar - guard = next((c for c in node.children if c.type == "promise_guard"), None) - if guard is None: # Should never happen - print("ERROR: Bundle section without a promise guard") - return - - self.promise_type = _text(guard)[:-1] # strip trailing ':' - return - if node.type == "attribute": - for child in node.children: - if child.type == "attribute_name": - self.attribute_name = _text(child) - if self.attribute_name == "namespace": - self.namespace = _text(child.next_named_sibling).strip("\"'") - return - return - - @staticmethod - def qualify(name: str, namespace: str) -> str: - """If name is already qualified (contains ':'), return as-is. Otherwise prepend namespace.""" - return name if ":" in name else f"{namespace}:{name}" - - -def lint_cfbs_json(filename) -> int: - assert os.path.isfile(filename) - assert filename.endswith("cfbs.json") - config = CFBSConfig.get_instance(filename=filename, non_interactive=True) - r = validate_config(config) - if r == 0: - print(f"PASS: {filename}") - return 0 - print(f"FAIL: {filename}") - return r +def _check_syntax(policy_file: PolicyFile, state: State) -> int: + """Iterate over a syntax tree and print errors if it is empty or has syntax + errors. + Notably, printing syntax errors _does not happen during parsing_. -def lint_json(filename) -> int: - assert os.path.isfile(filename) + Tree sitter has already fully parsed the policy, we iterate over the result + and see if it has inserted any "ERROR" nodes. - with open(filename, "r") as f: - data = f.read() + Stops at first error. Returns number of errors, always 0 or 1 in this case. + """ + assert state.mode == Mode.SYNTAX + filename = policy_file.filename + lines = policy_file.lines + if not policy_file.tree.root_node.children: + snippet = state.snippet + if snippet: + print( + f"Error: Empty policy snippet {snippet.i} at '{snippet.original_filename}:{snippet.original_line}'" + ) + else: + print(f"Error: Empty policy file '{filename}'") + return 1 - try: - data = json.loads(data) - except: - print(f"FAIL: {filename} (invalid JSON)") + assert policy_file.tree.root_node.type == "source_file" + + state.start_file(policy_file) + for node in policy_file.nodes: + state.navigate(node) + _discover_node(node, state) + if node.type != "ERROR": + continue + line = node.range.start_point[0] + 1 + column = node.range.start_point[1] + 1 + _highlight_range(node, lines) + location = state.get_location_extended(line, column) + print(f"Error: Syntax error {location}") + state.end_file() return 1 - print(f"PASS: {filename}") + state.end_file() return 0 -def _highlight_range(node, lines): - line = node.range.start_point[0] + 1 - column = node.range.start_point[1] +def _discover_node(node: Node, state: State) -> int: + """Look at a single node and if it's the name of a bundle / body / promise + block, add it to state. - length = len(lines[line - 1]) - column - if node.range.start_point[0] == node.range.end_point[0]: - # Starts and ends on same line: - length = node.range.end_point[1] - node.range.start_point[1] - assert length >= 1 - print("") - if line >= 2: - print(lines[line - 2]) - print(lines[line - 1]) - marker = "^" - if length > 2: - marker += "-" * (length - 2) - if length > 1: - marker += "^" - print(" " * column + marker) + control bodies are skipped. + Returns number of errors, always 0 in this case.""" -def _text(node): - return node.text.decode() + # Define bodies: + if node.type == "body_block_name": + name = _text(node) + if name == "control": + return 0 # No need to define control blocks + state.add_body(name) + return 0 + + # Define bundles: + if node.type == "bundle_block_name": + name = _text(node) + state.add_bundle(name) + return 0 + + # Define custom promise types: + if node.type == "promise_block_name": + state.add_promise_type(_text(node)) + return 0 + return 0 -def _walk_generic(filename, lines, node, visitor): - visitor(node) - for node in node.children: - _walk_generic(filename, lines, node, visitor) +def _discover(policy_file: PolicyFile, state: State) -> int: + """Discover all user defined bodies / bundles / promise types in a policy + file and adds them to state. -def _find_node_type(filename, lines, node, node_type): - matches = [] - visitor = lambda x: matches.extend([x] if x.type == node_type else []) - _walk_generic(filename, lines, node, visitor) - return matches + Returns number of errors, always 0 in this case. + """ + assert state.mode == Mode.DISCOVER + state.start_file(policy_file) + for node in policy_file.nodes: + state.navigate(node) + _discover_node(node, state) + state.end_file() + return 0 -def _node_checks(filename, lines, node, user_definition, strict, state: _State): +def _lint_node(node: Node, policy_file: PolicyFile, state: State) -> int: """Checks we run on each node in the syntax tree, utilizes state for checks which require context.""" + + lines = policy_file.lines line = node.range.start_point[0] + 1 column = node.range.start_point[1] + 1 + location = state.get_location_extended(line, column) + if node.type == "attribute_name" and _text(node) == "ifvarclass": _highlight_range(node, lines) - print( - f"Deprecation: Use 'if' instead of 'ifvarclass' at {filename}:{line}:{column}" - ) + print(f"Deprecation: Use 'if' instead of 'ifvarclass' {location}") return 1 if node.type == "promise_guard": assert _text(node) and len(_text(node)) > 1 and _text(node)[-1] == ":" @@ -171,154 +466,253 @@ def _node_checks(filename, lines, node, user_definition, strict, state: _State): if promise_type in DEPRECATED_PROMISE_TYPES: _highlight_range(node, lines) print( - f"Deprecation: Promise type '{promise_type}' is deprecated at {filename}:{line}:{column}" + f"Deprecation: Promise type '{promise_type}' is deprecated {location}" ) return 1 - if strict and ( - ( - promise_type - not in BUILTIN_PROMISE_TYPES.union( - user_definition.get("custom_promise_types", set()) - ) - ) + if ( + state.strict + and promise_type not in BUILTIN_PROMISE_TYPES + and promise_type not in state.custom_promise_types ): _highlight_range(node, lines) - print( - f"Error: Undefined promise type '{promise_type}' at {filename}:{line}:{column}" - ) - return 1 - if node.type == "bundle_block_name": - if _text(node) != _text(node).lower(): - _highlight_range(node, lines) - print( - f"Convention: Bundle name should be lowercase at {filename}:{line}:{column}" - ) - return 1 - if node.type == "promise_block_name": - if _text(node) != _text(node).lower(): - _highlight_range(node, lines) - print( - f"Convention: Promise type should be lowercase at {filename}:{line}:{column}" - ) - return 1 - if node.type == "bundle_block_type": - if _text(node) not in ALLOWED_BUNDLE_TYPES: - _highlight_range(node, lines) - print( - f"Error: Bundle type must be one of ({', '.join(ALLOWED_BUNDLE_TYPES)}), not '{_text(node)}' at {filename}:{line}:{column}" - ) + print(f"Error: Undefined promise type '{promise_type}' {location}") return 1 + if node.type == "bundle_block_name" and _text(node) != _text(node).lower(): + _highlight_range(node, lines) + print(f"Convention: Bundle name should be lowercase {location}") + return 1 + if node.type == "promise_block_name" and _text(node) != _text(node).lower(): + _highlight_range(node, lines) + print(f"Convention: Promise type should be lowercase {location}") + return 1 + if node.type == "bundle_block_type" and _text(node) not in ALLOWED_BUNDLE_TYPES: + _highlight_range(node, lines) + print( + f"Error: Bundle type must be one of ({', '.join(ALLOWED_BUNDLE_TYPES)}), not '{_text(node)}' {location}" + ) + return 1 if node.type == "calling_identifier": + name = _text(node) + qualified_name = _qualify(name, state.namespace) if ( - strict - and state.qualify(_text(node), state.namespace) - in user_definition.get("all_bundle_names", set()) - and state.promise_type in user_definition.get("custom_promise_types", set()) + state.strict + and qualified_name in state.bundles + and state.promise_type in state.custom_promise_types ): _highlight_range(node, lines) print( - f"Error: Call to bundle '{_text(node)}' inside custom promise: '{state.promise_type}' at {filename}:{line}:{column}" + f"Error: Call to bundle '{name}' inside custom promise: '{state.promise_type}' {location}" ) return 1 - if strict and ( - state.qualify(_text(node), state.namespace) - not in set.union( - user_definition.get("all_bundle_names", set()), - user_definition.get("all_body_names", set()), - ) - and _text(node) not in BUILTIN_FUNCTIONS + if state.strict and ( + qualified_name not in state.bundles + and qualified_name not in state.bodies + and name not in BUILTIN_FUNCTIONS ): _highlight_range(node, lines) print( - f"Error: Call to unknown function / bundle / body '{_text(node)}' at at {filename}:{line}:{column}" + f"Error: Call to unknown function / bundle / body '{name}' {location}" ) return 1 return 0 -def _stateful_walk( - filename, lines, node, user_definition, strict, state: _State | None = None -) -> int: - if state is None: - state = _State() +def _pass_fail_filename(filename: str, errors: int) -> str: + """String to print whether a file passed or failed, includes number of + errors in case of failure.""" + if errors == 0: + return f"PASS: {filename}" + error_string = f"{errors} error{'s' if errors > 1 else ''}" + return f"FAIL: {filename} ({error_string})" - errors = _node_checks(filename, lines, node, user_definition, strict, state) - state.update(node) - for child in node.children: - errors += _stateful_walk(filename, lines, child, user_definition, strict, state) +def _pass_fail_state(state: State, errors: int) -> str: + """String to print whether a file passed or failed. + + Uses state to get appropriate information for code blocks if necessary. + Must be called before state.end_file(). + """ + pretty_filename = state.get_pretty_filename() + return _pass_fail_filename(pretty_filename, errors) + + +def _lint(policy_file: PolicyFile, state: State) -> int: + """Run linting rules (checks) on nodes in a policy file syntax tree.""" + assert state.mode == Mode.LINT + errors = 0 + state.start_file(policy_file) + for node in policy_file.nodes: + state.navigate(node) + errors += _lint_node(node, policy_file, state) + message = _pass_fail_state(state, errors) + state.end_file() + if state.prefix: + print(state.prefix, end="") + print(message) return errors -def _walk(filename, lines, node, user_definition=None, strict=True) -> int: - if user_definition is None: - user_definition = {} +def _find_policy_files(args: Iterable[str]) -> Iterable[str]: + """Takes an iterator of paths to files / folders - error_nodes = _find_node_type(filename, lines, node, "ERROR") - if error_nodes: - for node in error_nodes: - line = node.range.start_point[0] + 1 - column = node.range.start_point[1] + 1 - _highlight_range(node, lines) - print(f"Error: Syntax error at {filename}:{line}:{column}") - return len(error_nodes) + Returns an iterator of CFEngine policy file paths (strings). + """ + for arg in args: + if os.path.isdir(arg): + while arg.endswith(("/.", "/")): + arg = arg[0:-1] + for result in find(arg, extension=".cf"): + yield result + elif arg.endswith(".cf"): + yield arg + + +def _find_json_files(args: Iterable[str]) -> Iterable[str]: + """Takes an iterator of paths to files / folders + + Returns an iterator of JSON file paths (strings). + """ + for arg in args: + if os.path.isdir(arg): + for result in find(arg, extension=".json"): + yield result + elif arg.endswith(".json"): + yield arg + + +def filter_filenames(filenames: Iterable[str], args: list[str]) -> Iterable[str]: + """Filter filenames to avoid linting cfbs generated files and hidden files. + + TODO: We should better respect the user's args if they do: + cfengine lint ./out/masterfies/ + cfengine lint ./somepath/.somehidden/policy.cf + """ + + for filename in filenames: + if filename in args: + # The filename was actually one of the args, include it regardless: + yield filename + continue + # Skip cfbs generated files by default: + if "/out/" in filename or "/." in filename: + continue + if filename.startswith("out/"): + continue + # Skip + if filename.startswith(".") and not filename.startswith("./"): + continue + yield filename + + +def _lint_main(args: list[str], strict: bool, state=None) -> int: + """This is the main function used for linting, it does all the steps on all + the arguments (files / folders). + + Summarized, it does: + 1. Find all filenames. + 2. Syntax check / lint JSON files. + 3. Parse policy files into syntax trees and check for syntax errors. + 4. Discover user defined bundles / bodies / promise types in syntax trees. + 5. Lint policy files (syntax trees) printing errors based on checks. + + If there are syntax errors, it stops early, the last 2 steps are not performed. + + Returns number of errors.""" + errors = 0 + + if state is None: + state = State() + state.strict = strict + state.mode = Mode.SYNTAX + + json_filenames = filter_filenames(_find_json_files(args), args=args) + policy_filenames = filter_filenames(_find_policy_files(args), args=args) + # TODO: JSON checking could be split into parse + # and additional checks for cfbs.json. + # The second step could happen after discovery for consistency. + for file in json_filenames: + errors += _lint_json_selector(file) + + policy_files = [] + for filename in policy_filenames: + policy_file = PolicyFile(filename) + r = _check_syntax(policy_file, state) + errors += r + if r != 0: + state.start_file(policy_file) + print(_pass_fail_state(state, r)) + state.end_file() + continue + policy_files.append(policy_file) + if errors != 0: + return errors + state.mode = Mode.DISCOVER + + for policy_file in policy_files: + errors += _discover(policy_file, state) + + state.mode = Mode.LINT + + for policy_file in policy_files: + errors += _lint(policy_file, state) + + return errors + + +def _highlight_range(node: Node, lines: list[str]) -> None: + """Highlight which line and which part of the line is problematic.""" line = node.range.start_point[0] + 1 - column = node.range.start_point[1] + 1 + column = node.range.start_point[1] - state = _State() - ret = _stateful_walk(filename, lines, node, user_definition, strict, state=state) - state = _State() # Clear state - return ret + length = len(lines[line - 1]) - column + if node.range.start_point[0] == node.range.end_point[0]: + # Starts and ends on same line: + length = node.range.end_point[1] - node.range.start_point[1] + assert length >= 1 + print("") -def _parse_user_definition(filename, lines, root_node): - ns = "default" - promise_blocks = set() - bundle_blocks = set() - body_blocks = set() + # Print previous line if any, for context + if line >= 2: + print(lines[line - 2]) - for child in root_node.children: - if child.type == "body_block": - name_node = next( - (c for c in child.named_children if c.type == "body_block_name"), - None, - ) - ns_attr = next( - ( - c - for c in _find_node_type(filename, lines, child, "attribute_name") - if _text(c) == "namespace" - ), - None, - ) - if ns_attr is not None: - ns = _text(ns_attr.next_named_sibling).strip("\"'") - elif name_node is not None: - body_blocks.add(_State.qualify(_text(name_node), ns)) - elif child.type == "bundle_block": - name_node = next( - (c for c in child.named_children if c.type == "bundle_block_name"), - None, - ) - if name_node is not None: - bundle_blocks.add(_State.qualify(_text(name_node), ns)) - elif child.type == "promise_block": - name_node = next( - (c for c in child.named_children if c.type == "promise_block_name"), - None, - ) - if name_node is not None: - promise_blocks.add(_text(name_node)) + # Print the problematic line: + print(lines[line - 1]) - return { - "custom_promise_types": promise_blocks, - "all_bundle_names": bundle_blocks, - "all_body_names": body_blocks, - } + # Print the arrows on the next line pointing at the problematic part: + marker = "^" + if length > 2: + marker += "-" * (length - 2) + if length > 1: + marker += "^" + print(" " * column + marker) -def _parse_policy_file(filename): +def _text(node: Node) -> str: + """Get the string / text of a syntax node, i.e. what was actually written + in the policy file.""" + assert node.text + return node.text.decode() + + +def _walk_callback(node: Node, callback: Callable[[Node], int]) -> int: + """Recursively walk a syntax tree, calling the callback on each node.""" + assert node + assert callback + + errors = 0 + errors += callback(node) + for child in node.children: + _walk_callback(child, callback) + return errors + + +def _parse_policy_file(filename: str) -> tuple[Tree, list[str], bytes]: + """Parse a policy file into a syntax tree using tree sitter. + + This function is used by PolicyFile constructor, in most cases it will be + to call Policyfile(filename) instead of this function.""" assert os.path.isfile(filename) PY_LANGUAGE = Language(tscfengine.language()) parser = Parser(PY_LANGUAGE) @@ -331,133 +725,77 @@ def _parse_policy_file(filename): return tree, lines, original_data -def lint_policy_file( - filename, - original_filename=None, - original_line=None, - snippet=None, - prefix=None, - user_definition=None, - strict=True, +def _lint_json_selector(file: str) -> int: + """Lint a JSON file using the cfbs function for cfbs.json and the generic + option otherwise.""" + assert os.path.isfile(file) + if file.endswith("/cfbs.json"): + return lint_cfbs_json(file) + assert file.endswith(".json") + return lint_json(file) # TODO don't call the public interface + + +# Interface: These are the functions we want to be called from outside +# They create State() and should not be called recursively inside lint.py + + +def lint_single_file(file: str, strict: bool = True) -> int: + """Lint a single file""" + return _lint_main([file], strict) + + +def lint_args(args: Iterable[str], strict: bool = True) -> int: + """Lint a list of args (files / folders)""" + return _lint_main(list(args), strict) + + +def lint_policy_file_snippet( + filename: str, + original_filename: str, + original_line: int, + snippet: int, + prefix: str, + strict: bool = True, ): - assert original_filename is None or type(original_filename) is str - assert original_line is None or type(original_line) is int - assert snippet is None or type(snippet) is int - if ( - original_filename is not None - or original_line is not None - or snippet is not None - ): - assert original_filename and os.path.isfile(original_filename) - assert original_line and original_line > 0 - assert snippet and snippet > 0 + """Lint a policy file snippet (extracted from a markdown code block).""" + assert prefix + assert original_filename and os.path.isfile(original_filename) + assert original_line and original_line > 0 + assert snippet and snippet > 0 assert os.path.isfile(filename) assert filename.endswith((".cf", ".cfengine3", ".cf3", ".cf.sub")) - if user_definition is None: - user_definition = {} - - tree, lines, original_data = _parse_policy_file(filename) - root_node = tree.root_node - if root_node.type != "source_file": - if snippet: - assert original_filename and original_line - print( - f"Error: Failed to parse policy snippet {snippet} at '{original_filename}:{original_line}'" - ) - else: - print(f" Empty policy file '{filename}'") - print(" Is this valid CFEngine policy?") - print("") - lines = original_data.decode().split("\n") - if not len(lines) <= 5: - lines = lines[:4] + ["..."] - for line in lines: - print(" " + line) - print("") - return 1 - assert root_node.type == "source_file" - errors = 0 - if not root_node.children: - if snippet: - assert original_filename and original_line - print( - f"Error: Empty policy snippet {snippet} at '{original_filename}:{original_line}'" - ) - else: - print(f"Error: Empty policy file '{filename}'") - errors += 1 - errors += _walk(filename, lines, root_node, user_definition, strict) - if prefix: - print(prefix, end="") - if errors == 0: - if snippet: - assert original_filename and original_line - print( - f"PASS: Snippet {snippet} at '{original_filename}:{original_line}' (cf3)" - ) - else: - print(f"PASS: {filename}") - return 0 - - if snippet: - assert original_filename and original_line - print(f"FAIL: Snippet {snippet} at '{original_filename}:{original_line}' (cf3)") - else: - print(f"FAIL: {filename} ({errors} error{'s' if errors > 0 else ''})") - return errors + state = State() + state.strict = strict + state.snippet = Snippet(original_filename, original_line, snippet) + state.prefix = prefix + return _lint_main([filename], strict, state) -def lint_folder(folder, strict=True): - errors = 0 - policy_files = [] - while folder.endswith(("/.", "/")): - folder = folder[0:-1] - for filename in itertools.chain( - find(folder, extension=".json"), find(folder, extension=".cf") - ): - if filename.startswith(("./.", "./out/", folder + "/.", folder + "/out/")): - continue - if filename.startswith(".") and not filename.startswith("./"): - continue +def lint_cfbs_json(filename: str) -> int: + """Parse a cfbs.json file, using the code from cfbs.""" + assert os.path.isfile(filename) + assert filename.endswith("cfbs.json") - if filename.endswith((".cf", ".cfengine3", ".cf3", ".cf.sub")): - policy_files.append(filename) - else: - errors += lint_single_file(filename) - - user_definition = {} - - # First pass: Gather custom types - for filename in policy_files if strict else []: - tree, lines, _ = _parse_policy_file(filename) - if tree.root_node.type == "source_file": - for key, val in _parse_user_definition( - filename, lines, tree.root_node - ).items(): - user_definition[key] = user_definition.get(key, set()).union(val) - - # Second pass: lint all policy files - for filename in policy_files: - errors += lint_policy_file( - filename, user_definition=user_definition, strict=strict - ) - return errors + config = CFBSConfig.get_instance(filename=filename, non_interactive=True) + r = validate_config(config) + print(_pass_fail_filename(filename, r)) + return r -def lint_single_file(file, user_definition=None, strict=True): - assert os.path.isfile(file) - if file.endswith("/cfbs.json"): - return lint_cfbs_json(file) - if file.endswith(".json"): - return lint_json(file) - assert file.endswith(".cf") - return lint_policy_file(file, user_definition=user_definition, strict=strict) +def lint_json(filename: str) -> int: + """Lint a JSON file, essentially just checking that it parses + successfully.""" + assert os.path.isfile(filename) -def lint_single_arg(arg, strict=True): - if os.path.isdir(arg): - return lint_folder(arg, strict) - assert os.path.isfile(arg) + with open(filename, "r") as f: + data = f.read() + r = 0 + try: + data = json.loads(data) + except: + r = 1 - return lint_single_file(arg, strict=strict) + print(_pass_fail_filename(filename, r)) + return r