diff --git a/.github/workflows/csv_linter.yml b/.github/workflows/csv_linter.yml index f07553d..df217ad 100644 --- a/.github/workflows/csv_linter.yml +++ b/.github/workflows/csv_linter.yml @@ -9,31 +9,69 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: '3.8' + - name: Install dependencies run: | + python -m pip install --upgrade pip python -m pip install csvkit - - - name: Lint CSV files - run: find . -name "*.csv" -exec csvclean -n -u 2 {} + - env: - CI: true - - name: Check for errors - if: always() + + - name: Validate CSV structure run: | - if ! grep -q "No errors." <(find . -name "*.csv" -exec csvclean -n -u 2 {} + | grep -v "^$"); then - echo "Errors were found in one or more CSV files." + echo "Checking CSV structure..." + if ! csvclean -n prompts.csv 2>&1 | tee /tmp/csv_errors.log; then + echo "::error::CSV validation failed" + cat /tmp/csv_errors.log exit 1 fi + + - name: Check CSV format + run: | + echo "Checking CSV format..." + if ! python -c ' + import csv + with open("prompts.csv", "r", encoding="utf-8") as f: + reader = csv.reader(f) + headers = next(reader) + if headers != ["act", "prompt"]: + print("Error: CSV headers must be exactly [act, prompt]") + exit(1) + for row_num, row in enumerate(reader, 2): + if len(row) != 2: + print(f"Error: Row {row_num} has {len(row)} columns, expected 2") + exit(1) + if not row[0] or not row[1]: + print(f"Error: Row {row_num} has empty values") + exit(1) + '; then + echo "::error::CSV format check failed" + exit 1 + fi + - name: Check Trailing Whitespaces run: | - # Check if the special file contains any trailing whitespaces - if grep -q "[[:space:]]$" ./prompts.csv; then - echo "ERROR: Found trailing whitespaces in prompts.csv" + echo "Checking for trailing whitespaces..." + if grep -q "[[:space:]]$" prompts.csv; then + echo "::error::Found trailing whitespaces in prompts.csv" + grep -n "[[:space:]]$" prompts.csv | while read -r line; do + echo "Line with trailing whitespace: $line" + done + exit 1 + fi + echo "No trailing whitespaces found" + + - name: Check for UTF-8 BOM and line endings + run: | + echo "Checking for UTF-8 BOM and line endings..." + if file prompts.csv | grep -q "with BOM"; then + echo "::error::File contains UTF-8 BOM marker" + exit 1 + fi + if file prompts.csv | grep -q "CRLF"; then + echo "::error::File contains Windows-style (CRLF) line endings" exit 1 - else - echo "No trailing whitespaces found in prompts.csv" fi \ No newline at end of file