Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.env
.token
.local
.aider*
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
repos:
- repo: https://github.com/Yelp/detect-secrets
rev: v1.5.0
hooks:
- id: detect-secrets
args: ['--baseline', '.secrets.baseline']
additional_dependencies: ['gibberish-detector']
131 changes: 131 additions & 0 deletions .secrets.baseline
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
{
"version": "1.5.0",
"plugins_used": [
{
"name": "ArtifactoryDetector"
},
{
"name": "AWSKeyDetector"
},
{
"name": "AzureStorageKeyDetector"
},
{
"name": "Base64HighEntropyString",
"limit": 4.5
},
{
"name": "BasicAuthDetector"
},
{
"name": "CloudantDetector"
},
{
"name": "DiscordBotTokenDetector"
},
{
"name": "GitHubTokenDetector"
},
{
"name": "GitLabTokenDetector"
},
{
"name": "HexHighEntropyString",
"limit": 3.0
},
{
"name": "IbmCloudIamDetector"
},
{
"name": "IbmCosHmacDetector"
},
{
"name": "IPPublicDetector"
},
{
"name": "JwtTokenDetector"
},
{
"name": "KeywordDetector",
"keyword_exclude": ""
},
{
"name": "MailchimpDetector"
},
{
"name": "NpmDetector"
},
{
"name": "OpenAIDetector"
},
{
"name": "PrivateKeyDetector"
},
{
"name": "PypiTokenDetector"
},
{
"name": "SendGridDetector"
},
{
"name": "SlackDetector"
},
{
"name": "SoftlayerDetector"
},
{
"name": "SquareOAuthDetector"
},
{
"name": "StripeDetector"
},
{
"name": "TelegramBotTokenDetector"
},
{
"name": "TwilioKeyDetector"
}
],
"filters_used": [
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
},
{
"path": "detect_secrets.filters.gibberish.should_exclude_secret",
"limit": 3.7
},
{
"path": "detect_secrets.filters.heuristic.is_indirect_reference"
},
{
"path": "detect_secrets.filters.heuristic.is_likely_id_string"
},
{
"path": "detect_secrets.filters.heuristic.is_lock_file"
},
{
"path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string"
},
{
"path": "detect_secrets.filters.heuristic.is_potential_uuid"
},
{
"path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
},
{
"path": "detect_secrets.filters.heuristic.is_sequential_string"
},
{
"path": "detect_secrets.filters.heuristic.is_swagger_file"
},
{
"path": "detect_secrets.filters.heuristic.is_templated_secret"
}
],
"results": {},
"generated_at": "2025-09-23T04:15:11Z"
}
11 changes: 11 additions & 0 deletions pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

set -euxo pipefail

mkdir -p .local/raw_data .local/metrics

chmod o+w .local/raw_data .local/metrics

podman kube play pipeline.yaml

podman kube down pipeline.yaml
8 changes: 8 additions & 0 deletions pipeline.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
INSTALL json;
LOAD json;
CREATE OR REPLACE TABLE raw_commits AS SELECT * FROM read_json_auto('/raw_data/commits.jsonl');
CREATE OR REPLACE TABLE raw_license AS SELECT * FROM read_json_auto('/raw_data/license.jsonl');
CREATE OR REPLACE TABLE raw_contributors AS SELECT * FROM read_json_auto('/raw_data/contributors.jsonl');
CREATE OR REPLACE TABLE raw_releases AS SELECT * FROM read_json_auto('/raw_data/releases.jsonl');
CREATE OR REPLACE TABLE raw_issues AS SELECT * FROM read_json_auto('/raw_data/issues.jsonl');
CREATE OR REPLACE TABLE raw_root_md_files AS SELECT * FROM read_json_auto('/raw_data/root_md_files.jsonl');
53 changes: 53 additions & 0 deletions pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
apiVersion: v1
kind: Pod
metadata:
name: project-health-pipeline
spec:
initContainers:
- name: health-analyzer
image: codeberg.org/0xf1e/project-health-analyzer:latest
env:
- name: GITHUB_TOKEN_FILE
value: /app/token.txt
volumeMounts:
- name: raw-data-volume
mountPath: /app/output
- name: token-volume
mountPath: /app/token.txt
resources: {}
- name: duckdb-importer
image: docker.io/duckdb/duckdb
command:
- duckdb
- /metrics/project_health.db
- -f
- /app/pipeline.sql
volumeMounts:
- name: raw-data-volume
mountPath: /raw_data
- name: metrics-volume
mountPath: /metrics
- name: sql-volume
mountPath: /app/pipeline.sql
resources: {}
containers:
- name: todo
image: nginx:latest
restartPolicy: Never
volumes:
- name: raw-data-volume
hostPath:
path: ./.local/raw_data
type: Directory
- name: metrics-volume
hostPath:
path: ./.local/metrics
type: Directory
- name: token-volume
hostPath:
path: ./.token
type: File
- name: sql-volume
hostPath:
path: ./pipeline.sql
type: File