Skip to content

Commit 1436fb2

Browse files
committed
Merge branch 'master' into david-leifker-patch-3
2 parents 5b31839 + d637bf6 commit 1436fb2

File tree

560 files changed

+80677
-5857
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

560 files changed

+80677
-5857
lines changed
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/env python3
2+
"""Validate that gradle.lockfile files are up-to-date with current dependencies.
3+
4+
This script uses Gradle's built-in dependency resolution to regenerate lockfiles
5+
and checks if they differ from the committed versions.
6+
"""
7+
8+
import subprocess
9+
import sys
10+
import os
11+
12+
13+
def run_command(cmd: list[str], check: bool = True) -> subprocess.CompletedProcess:
14+
"""Run a shell command and return the result."""
15+
return subprocess.run(
16+
cmd,
17+
capture_output=True,
18+
text=True,
19+
check=check,
20+
)
21+
22+
23+
def check_for_changes_in_gradle_files() -> bool:
24+
"""Check if any build.gradle or gradle.lockfile files were changed."""
25+
try:
26+
# Get all changed files (staged + unstaged)
27+
result = run_command(["git", "diff", "--name-only", "HEAD"])
28+
unstaged = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
29+
30+
result = run_command(["git", "diff", "--name-only", "--cached"])
31+
staged = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
32+
33+
changed_files = unstaged.union(staged)
34+
35+
# Check if any gradle files were modified
36+
gradle_files = [
37+
f for f in changed_files
38+
if f.endswith("build.gradle")
39+
or f.endswith("build.gradle.kts")
40+
or f.endswith("gradle.lockfile")
41+
or "gradle.properties" in f
42+
or "gradle/wrapper" in f
43+
]
44+
45+
return len(gradle_files) > 0
46+
except subprocess.CalledProcessError:
47+
# If we can't determine, assume we should check
48+
return True
49+
50+
51+
def regenerate_lockfiles() -> tuple[bool, str]:
52+
"""Regenerate all lockfiles using Gradle.
53+
54+
Returns:
55+
Tuple of (success, error_message)
56+
"""
57+
print("Regenerating lockfiles to verify they are up-to-date...")
58+
print("Running: ./gradlew resolveAndLockAll --write-locks")
59+
60+
try:
61+
result = run_command(
62+
["./gradlew", "resolveAndLockAll", "--write-locks", "-x", "generateGitPropertiesGlobal"],
63+
check=False
64+
)
65+
66+
if result.returncode != 0:
67+
return False, f"Failed to regenerate lockfiles:\n{result.stderr}"
68+
69+
return True, ""
70+
except Exception as e:
71+
return False, f"Error running Gradle: {str(e)}"
72+
73+
74+
def check_for_lockfile_diffs() -> tuple[bool, list[str]]:
75+
"""Check if any lockfiles have differences after regeneration.
76+
77+
Returns:
78+
Tuple of (has_diffs, list_of_changed_files)
79+
"""
80+
try:
81+
result = run_command(
82+
["git", "diff", "--name-only", "**gradle.lockfile"],
83+
check=False
84+
)
85+
86+
if result.returncode == 0 and result.stdout.strip():
87+
changed_lockfiles = [
88+
f for f in result.stdout.strip().split("\n")
89+
if f.endswith("gradle.lockfile")
90+
]
91+
return True, changed_lockfiles
92+
93+
return False, []
94+
except subprocess.CalledProcessError:
95+
return False, []
96+
97+
98+
def restore_lockfiles():
99+
"""Restore lockfiles to their original state."""
100+
print("Restoring lockfiles to original state...")
101+
try:
102+
run_command(["git", "checkout", "**gradle.lockfile"], check=False)
103+
except:
104+
pass
105+
106+
107+
def main():
108+
"""Main validation function."""
109+
print("Checking gradle lockfile updates...")
110+
111+
# Check if we're in a git repository
112+
result = run_command(["git", "rev-parse", "--git-dir"], check=False)
113+
if result.returncode != 0:
114+
print("Not in a git repository. Skipping lockfile check.")
115+
return 0
116+
117+
# Check if any gradle-related files changed
118+
if not check_for_changes_in_gradle_files():
119+
print("✓ No gradle files changed. Skipping lockfile verification.")
120+
return 0
121+
122+
# Regenerate lockfiles
123+
success, error = regenerate_lockfiles()
124+
if not success:
125+
print(f"\n❌ ERROR: {error}")
126+
return 1
127+
128+
# Check for differences
129+
has_diffs, changed_files = check_for_lockfile_diffs()
130+
131+
# Always restore lockfiles to original state
132+
restore_lockfiles()
133+
134+
if has_diffs:
135+
print("\n❌ ERROR: Dependency lockfiles are out of date!\n")
136+
print("The following lockfiles need to be updated:\n")
137+
for file in changed_files:
138+
print(f" • {file}")
139+
140+
print("\nYour build.gradle changes affect dependency resolution, but the")
141+
print("corresponding lockfiles were not updated.\n")
142+
print("To fix this, run:")
143+
print(" ./gradlew resolveAndLockAll --write-locks\n")
144+
print("Then commit the updated gradle.lockfile files along with your changes.")
145+
146+
return 1
147+
148+
print("✓ All gradle lockfiles are up-to-date.")
149+
return 0
150+
151+
152+
if __name__ == "__main__":
153+
sys.exit(main())

.github/scripts/generate_pre_commit.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def _generate_lint_fix_hook(self, project: Project) -> dict:
193193
return {
194194
"id": f"{project.project_id}-lint-fix",
195195
"name": f"{project.path} Lint Fix",
196-
"entry": f"./gradlew {project.gradle_path}:lintFix",
196+
"entry": f"./gradlew {project.gradle_path}:lintFix -x generateGitPropertiesGlobal",
197197
"language": "system",
198198
"files": f"^{project.path}/.*\\.(py|toml)$",
199199
"pass_filenames": False,
@@ -204,7 +204,7 @@ def _generate_spotless_hook(self, project: Project) -> dict:
204204
return {
205205
"id": f"{project.project_id}-spotless",
206206
"name": f"{project.path} Spotless Apply",
207-
"entry": f"./gradlew {project.gradle_path}:spotlessApply",
207+
"entry": f"./gradlew {project.gradle_path}:spotlessApply -x generateGitPropertiesGlobal",
208208
"language": "system",
209209
"files": f"^{project.path}/.*\\.java$",
210210
"pass_filenames": False,
@@ -215,7 +215,7 @@ def _generate_prettier_hook(self, project: Project) -> dict:
215215
return {
216216
"id": f"{project.project_id}-{project.taskName}",
217217
"name": f"{project.taskName}",
218-
"entry": f"./gradlew {project.gradle_path}:{project.taskName}",
218+
"entry": f"./gradlew {project.gradle_path}:{project.taskName} -x generateGitPropertiesGlobal",
219219
"language": "system",
220220
"files": project.filePattern,
221221
"pass_filenames": False,

.github/scripts/pre-commit-override.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,19 @@ repos:
33
hooks:
44
- id: smoke-test-cypress-lint-fix
55
name: smoke-test cypress Lint Fix
6-
entry: ./gradlew :smoke-test:cypressLintFix
6+
entry: ./gradlew :smoke-test:cypressLintFix -x generateGitPropertiesGlobal
77
language: system
88
files: ^smoke-test/tests/cypress/.*\.tsx$
99
pass_filenames: false
1010
- id: update-lineage-file
1111
name: update-lineage-file
12-
entry: ./gradlew :metadata-ingestion:lineageGen
12+
entry: ./gradlew :metadata-ingestion:lineageGen -x generateGitPropertiesGlobal
1313
language: system
1414
files: ^(metadata-ingestion-modules/.*|metadata-models/.*)$
1515
pass_filenames: false
16+
- id: check-gradle-lockfiles
17+
name: Check gradle lockfiles are updated
18+
entry: python .github/scripts/check_gradle_lockfiles.py
19+
language: system
20+
files: ^.*build\.gradle(\.kts)?$
21+
pass_filenames: false

.github/workflows/code-checks.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,23 @@ on:
77
- "metadata-io/**"
88
- "datahub-web-react/**"
99
- "metadata-service/war/src/main/resources/boot/policies.json"
10+
- "**/build.gradle"
11+
- "**/build.gradle.kts"
12+
- "**/gradle.lockfile"
1013
- ".github/workflows/code-checks.yml"
14+
- ".github/scripts/check_*.py"
1115
pull_request:
1216
branches:
1317
- "**"
1418
paths:
1519
- "metadata-io/**"
1620
- "datahub-web-react/**"
1721
- "metadata-service/war/src/main/resources/boot/policies.json"
22+
- "**/build.gradle"
23+
- "**/build.gradle.kts"
24+
- "**/gradle.lockfile"
1825
- ".github/workflows/code-checks.yml"
26+
- ".github/scripts/check_*.py"
1927

2028
concurrency:
2129
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -26,7 +34,12 @@ jobs:
2634
strategy:
2735
fail-fast: false
2836
matrix:
29-
command: ["check_event_type.py", "check_policies.py"]
37+
command:
38+
[
39+
"check_event_type.py",
40+
"check_policies.py",
41+
"check_gradle_lockfiles.py",
42+
]
3043
name: run code checks
3144
runs-on: ubuntu-latest
3245
steps:

.github/workflows/docker-unified-nightly.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ jobs:
323323
TEST_STRATEGY: ${{ matrix.test_strategy }}
324324
BATCH_COUNT: "1" # since this workflow runs only on schedule trigger, batching isn't really needed.
325325
BATCH_NUMBER: "0"
326+
PROFILE_NAME: ${{ matrix.profile }}
326327
run: |
327328
echo "$DATAHUB_VERSION"
328329
./gradlew --stop

.github/workflows/docker-unified.yml

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ on:
1212
- master
1313
- releases/**
1414
pull_request:
15+
types: [opened, synchronize, reopened, labeled]
1516
branches:
1617
- "**"
1718
release:
@@ -21,15 +22,19 @@ concurrency:
2122
# Using `github.run_id` (unique val) instead of `github.ref` here
2223
# because we don't want to cancel this workflow on master only for PRs
2324
# as that makes reproducing issues easier
24-
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}
25+
# Adding github.event.action == labeled as a means to differentiate the trigger due to adding a label -- most labels are
26+
# no-ops except for `depot`
27+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}-${{ github.event.action == 'labeled' }}
2528
cancel-in-progress: true
2629

2730
env:
2831
DOCKER_REGISTRY: "acryldata"
2932
PROFILE_NAME: "${{ github.event.inputs.profileName || 'quickstart-consumers' }}"
3033

3134
DOCKER_CACHE: "DEPOT"
32-
DEPOT_PROJECT_ID: "${{ vars.DEPOT_PROJECT_ID }}"
35+
DEPOT_PROJECT_ID: "s0gr1cr3jd"
36+
HAS_DEPOT_LABEL: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'depot') }}
37+
IS_FORK: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository }}
3338
DEPOT_TOKEN: "${{ secrets.DEPOT_TOKEN }}"
3439

3540
permissions:
@@ -39,6 +44,7 @@ permissions:
3944
jobs:
4045
setup:
4146
runs-on: depot-ubuntu-24.04-small
47+
if: ${{ github.event_name != 'pull_request' || github.event.action != 'labeled' || github.event.label.name == 'depot' }}
4248
outputs:
4349
# TODO: Many of the vars below should not be required anymore.
4450
tag: ${{ steps.tag.outputs.tag }}
@@ -135,22 +141,25 @@ jobs:
135141
- name: Determine runner type
136142
id: set-runner
137143
# This needs to handle two scenarios:
138-
# 1. Running on a PR from a fork. There are some auth issues that prevent us from using depot in that case.
139-
# So, Its easier to just use the regular github actions cache and build all images for each parallel job running smoke test.
140-
# Note, concurrency is lower when using github runners, queue times can be longer, test time is longer due to fewer parallel jobs.
141-
# 2. Running on a PR from a branch in the datahub-project org and push/schedule events on master.
144+
# 1. Running on a PR from a fork. We use github runners, unless the "depot" label exists -- in which case, we run
145+
# it on depotNote, concurrency is lower when using github runners, queue times can be longer, test time is longer
146+
# due to fewer parallel jobs.
147+
# 3. Running on a PR from a branch in the datahub-project org and push/schedule events on master.
142148
# Depot is used here for remote container builds in base_build and also for all runners. Depot runners support unlimited concurrency
143149
# and hence short queue times and higher parallelism of smoke tests
144-
145150
run: |
146-
if [[ "${{ env.DOCKER_CACHE }}" == "DEPOT" && "${{ env.DEPOT_PROJECT_ID }}" != "" ]]; then
151+
if [[ "${{ env.DOCKER_CACHE }}" == "DEPOT" && "${{ env.IS_FORK }}" == "false" ]]; then
147152
echo "build_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
148153
echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
149154
echo "test_runner_type_small=depot-ubuntu-24.04-small" >> "$GITHUB_OUTPUT"
150155
echo "use_depot_cache=true" >> "$GITHUB_OUTPUT"
151156
else
152157
echo "build_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
153-
echo "test_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
158+
if [[ "${{ env.HAS_DEPOT_LABEL }}" == "true" ]]; then
159+
echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
160+
else
161+
echo "test_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
162+
fi
154163
echo "test_runner_type_small=ubuntu-latest" >> "$GITHUB_OUTPUT"
155164
echo "use_depot_cache=false" >> "$GITHUB_OUTPUT"
156165
# publishing is currently only supported via depot
@@ -392,7 +401,7 @@ jobs:
392401
# python_batch_count is used to split pytests in the smoke-test (batches of actual test functions)
393402
# cypress_batch_count is used to split the collection of cypress test specs into batches.
394403
run: |
395-
if [[ "${{ needs.setup.outputs.test_runner_type }}" == "ubuntu-latest" ]]; then
404+
if [[ "${{ env.IS_FORK }}" == "true" ]]; then
396405
echo "cypress_batch_count=5" >> "$GITHUB_OUTPUT"
397406
echo "python_batch_count=3" >> "$GITHUB_OUTPUT"
398407
else
@@ -663,7 +672,7 @@ jobs:
663672

664673
- name: Download build Metadata
665674
if: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
666-
uses: actions/download-artifact@v5
675+
uses: actions/download-artifact@v6
667676
with:
668677
name: build-metadata-${{ needs.setup.outputs.tag }}
669678
path: ${{ github.workspace }}/build

.github/workflows/update-test-weights.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ jobs:
2626

2727
steps:
2828
- name: Checkout repository
29-
uses: actions/checkout@v4
29+
uses: actions/checkout@v5
3030
with:
3131
fetch-depth: 0
3232

3333
- name: Set up Python
34-
uses: actions/setup-python@v5
34+
uses: actions/setup-python@v6
3535
with:
3636
python-version: "3.10"
3737

.github/workflows/verify-quickstart-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
THRESHOLD_GB: 4.3
1717
steps:
1818
- name: Checkout repository
19-
uses: actions/checkout@v4
19+
uses: actions/checkout@v5
2020

2121
- name: Generate quickstart compose file
2222
run: |

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ MANIFEST
3535
.python-version
3636
Pipfile
3737
Pipfile.lock
38+
fix-failing-tests/
3839

3940
# Generated files
4041
**/bin

0 commit comments

Comments
 (0)