|
| 1 | +name: Nightly Throughput Stress |
| 2 | + |
| 3 | +on: |
| 4 | + schedule: |
| 5 | + # Run at 3 AM PST (11:00 UTC) - offset from existing nightly |
| 6 | + - cron: '00 11 * * *' |
| 7 | + push: |
| 8 | + branches: |
| 9 | + - nightly_tps |
| 10 | + workflow_dispatch: |
| 11 | + inputs: |
| 12 | + duration: |
| 13 | + description: 'Test duration (e.g., 6h, 1h)' |
| 14 | + required: false |
| 15 | + default: '5h' |
| 16 | + type: string |
| 17 | + timeout: |
| 18 | + description: 'Scenario timeout (should always be 30m more than duration)' |
| 19 | + required: false |
| 20 | + default: '5h30m' |
| 21 | + type: string |
| 22 | + job_timeout_minutes: |
| 23 | + description: 'GitHub Actions job timeout in minutes' |
| 24 | + required: false |
| 25 | + default: 360 |
| 26 | + type: number |
| 27 | + |
| 28 | +env: |
| 29 | + # Workflow configuration |
| 30 | + TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }} |
| 31 | + TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }} |
| 32 | + |
| 33 | + # Logging and artifacts |
| 34 | + WORKER_LOG_DIR: /tmp/throughput-stress-logs |
| 35 | + |
| 36 | + # Omes configuration |
| 37 | + OMES_REPO: temporalio/omes |
| 38 | + OMES_REF: main |
| 39 | + RUN_ID: ${{ github.run_id }}-throughput-stress |
| 40 | + |
| 41 | +jobs: |
| 42 | + throughput-stress: |
| 43 | + runs-on: ubuntu-latest-4-cores |
| 44 | + timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || vars.NIGHTLY_JOB_TIMEOUT_MINUTES || 360) }} |
| 45 | + |
| 46 | + steps: |
| 47 | + - name: Print test configuration |
| 48 | + run: | |
| 49 | + echo "=== Throughput Stress Test Configuration ===" |
| 50 | + echo "Duration: $TEST_DURATION" |
| 51 | + echo "Timeout: $TEST_TIMEOUT" |
| 52 | + echo "Run ID: $RUN_ID" |
| 53 | + echo "==========================================" |
| 54 | +
|
| 55 | + - name: Checkout SDK |
| 56 | + uses: actions/checkout@v4 |
| 57 | + with: |
| 58 | + submodules: recursive |
| 59 | + |
| 60 | + - name: Checkout OMES |
| 61 | + uses: actions/checkout@v4 |
| 62 | + with: |
| 63 | + repository: ${{ env.OMES_REPO }} |
| 64 | + ref: ${{ env.OMES_REF }} |
| 65 | + path: omes |
| 66 | + submodules: recursive |
| 67 | + |
| 68 | + - name: Setup Go |
| 69 | + uses: actions/setup-go@v5 |
| 70 | + with: |
| 71 | + go-version-file: omes/go.mod |
| 72 | + cache-dependency-path: omes/go.sum |
| 73 | + |
| 74 | + - name: Setup Node |
| 75 | + uses: actions/setup-node@v4 |
| 76 | + with: |
| 77 | + node-version: 22 |
| 78 | + |
| 79 | + - name: Get NPM cache directory |
| 80 | + id: npm-cache-dir |
| 81 | + run: echo "dir=$(npm config get cache)" >> ${GITHUB_OUTPUT} |
| 82 | + |
| 83 | + - name: Restore NPM cache |
| 84 | + uses: actions/cache/restore@v4 |
| 85 | + with: |
| 86 | + path: ${{ steps.npm-cache-dir.outputs.dir }} |
| 87 | + key: npm-main-linux-x64-${{ hashFiles('./package-lock.json') }} |
| 88 | + restore-keys: | |
| 89 | + npm-main-linux-x64- |
| 90 | +
|
| 91 | + - name: Install protoc |
| 92 | + uses: arduino/setup-protoc@v3 |
| 93 | + with: |
| 94 | + version: '23.x' |
| 95 | + repo-token: ${{ secrets.GITHUB_TOKEN }} |
| 96 | + |
| 97 | + - name: Setup Rust |
| 98 | + uses: dtolnay/rust-toolchain@stable |
| 99 | + |
| 100 | + - name: Rust cache |
| 101 | + uses: Swatinem/rust-cache@v2 |
| 102 | + with: |
| 103 | + workspaces: packages/core-bridge -> target |
| 104 | + |
| 105 | + - name: Install SDK dependencies |
| 106 | + run: | |
| 107 | + npm ci --ignore-scripts --verbose || \ |
| 108 | + npm ci --ignore-scripts --verbose || \ |
| 109 | + npm ci --ignore-scripts --verbose |
| 110 | +
|
| 111 | + - name: Build SDK |
| 112 | + run: npm run build |
| 113 | + env: |
| 114 | + BUILD_CORE_RELEASE: true |
| 115 | + |
| 116 | + - name: Save NPM cache |
| 117 | + uses: actions/cache/save@v4 |
| 118 | + if: always() |
| 119 | + with: |
| 120 | + path: ${{ steps.npm-cache-dir.outputs.dir }} |
| 121 | + key: npm-main-linux-x64-${{ hashFiles('./package-lock.json') }} |
| 122 | + |
| 123 | + - name: Install Temporal CLI |
| 124 | + uses: temporalio/setup-temporal@v0 |
| 125 | + |
| 126 | + - name: Setup log directory |
| 127 | + run: mkdir -p $WORKER_LOG_DIR |
| 128 | + |
| 129 | + - name: Start Temporal Server |
| 130 | + run: | |
| 131 | + temporal server start-dev \ |
| 132 | + --db-filename temporal-throughput-stress.sqlite \ |
| 133 | + --sqlite-pragma journal_mode=WAL \ |
| 134 | + --sqlite-pragma synchronous=OFF \ |
| 135 | + --headless &> $WORKER_LOG_DIR/temporal-server.log & |
| 136 | +
|
| 137 | + - name: Run throughput stress scenario with local SDK |
| 138 | + working-directory: omes |
| 139 | + run: | |
| 140 | + # This makes the pipeline return the exit code of the first failing command |
| 141 | + # Otherwise the output of the `tee` command will be used |
| 142 | + # (which is troublesome when the scenario fails but the `tee` command succeeds) |
| 143 | + set -o pipefail |
| 144 | +
|
| 145 | + # Use run-scenario-with-worker to build and run in one step |
| 146 | + # Pass the SDK directory as --version for local testing |
| 147 | + # Note: The hardcoded values below match OMES defaults, except: |
| 148 | + # - visibility-count-timeout: 5m (vs 3m default) |
| 149 | + # to give CI a bit more time for visibility consistency |
| 150 | + go run ./cmd run-scenario-with-worker \ |
| 151 | + --scenario throughput_stress \ |
| 152 | + --language typescript \ |
| 153 | + --version $(pwd)/.. \ |
| 154 | + --run-id $RUN_ID \ |
| 155 | + --duration $TEST_DURATION \ |
| 156 | + --timeout $TEST_TIMEOUT \ |
| 157 | + --max-concurrent 10 \ |
| 158 | + --option internal-iterations=10 \ |
| 159 | + --option continue-as-new-after-iterations=3 \ |
| 160 | + --option sleep-time=1s \ |
| 161 | + --option visibility-count-timeout=5m \ |
| 162 | + --option min-throughput-per-hour=1000 \ |
| 163 | + 2>&1 | tee $WORKER_LOG_DIR/scenario.log |
| 164 | +
|
| 165 | + - name: Upload logs on failure |
| 166 | + if: failure() || cancelled() |
| 167 | + uses: actions/upload-artifact@v4 |
| 168 | + with: |
| 169 | + name: throughput-stress-logs |
| 170 | + path: ${{ env.WORKER_LOG_DIR }} |
| 171 | + retention-days: 30 |
| 172 | + |
| 173 | + - name: Notify Slack on failure |
| 174 | + if: failure() || cancelled() |
| 175 | + uses: slackapi/slack-github-action@v2 |
| 176 | + with: |
| 177 | + webhook-type: incoming-webhook |
| 178 | + payload: | |
| 179 | + { |
| 180 | + "text": "Nightly TypeScript throughput stress test failed", |
| 181 | + "blocks": [ |
| 182 | + { |
| 183 | + "type": "section", |
| 184 | + "text": { |
| 185 | + "type": "mrkdwn", |
| 186 | + "text": "*Nightly Throughput Stress Failed* :x:\n\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}" |
| 187 | + } |
| 188 | + } |
| 189 | + ] |
| 190 | + } |
| 191 | + env: |
| 192 | + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }} |
0 commit comments