refactor: improve intermediate result parsing in issue dedup workflow (#7218)

This commit is contained in:
Gaurav 2025-08-27 10:14:38 -07:00 committed by GitHub
parent 0c1f3acc7d
commit a33293ac60
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -44,7 +44,7 @@ jobs:
timeout-minutes: 20 timeout-minutes: 20
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
outputs: outputs:
duplicate_issues_json: '${{ steps.gemini_issue_deduplication.outputs.summary }}' duplicate_issues_csv: '${{ env.DUPLICATE_ISSUES_CSV }}'
steps: steps:
- name: 'Checkout' - name: 'Checkout'
uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5 uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
@ -135,11 +135,10 @@ jobs:
- If your final list is empty, you are done. - If your final list is empty, you are done.
- Print to the logs if you omitted any potential duplicates based on your analysis. - Print to the logs if you omitted any potential duplicates based on your analysis.
- If the `duplicates` tool returned 15+ results, use the top 15 matches (based on descending similarity score value) to perform this step. - If the `duplicates` tool returned 15+ results, use the top 15 matches (based on descending similarity score value) to perform this step.
3. **Output final duplicates list as JSON:** 3. **Output final duplicates list as CSV:**
- Output the final list of duplicate issue numbers in a JSON format. - Convert the list of appropriate duplicate issue numbers into a comma-separated list (CSV). If there are no appropriate duplicates, use the empty string.
- For example: `{"duplicate_issues": [123, 456]}` - Use the "echo" shell command to append the CSV of issue numbers into the filepath referenced by the environment variable "${GITHUB_ENV}":
- If no duplicates are found, output `{"duplicate_issues": []}`. echo "DUPLICATE_ISSUES_CSV=[DUPLICATE_ISSUES_AS_CSV]" >> "${GITHUB_ENV}"
- Do not include any explanation or additional text, just the JSON.
## Guidelines ## Guidelines
- Only use the `duplicates` and `run_shell_command` tools. - Only use the `duplicates` and `run_shell_command` tools.
- The `run_shell_command` tool can be used with `gh issue view`. - The `run_shell_command` tool can be used with `gh issue view`.
@ -153,14 +152,20 @@ jobs:
if: |- if: |-
github.repository == 'google-gemini/gemini-cli' && github.repository == 'google-gemini/gemini-cli' &&
vars.TRIAGE_DEDUPLICATE_ISSUES != '' && vars.TRIAGE_DEDUPLICATE_ISSUES != '' &&
needs.find-duplicates.outputs.duplicate_issues_json && needs.find-duplicates.outputs.duplicate_issues_csv != '' &&
(github.event_name == 'issues' || (
github.event_name == 'issues' ||
github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_dispatch' ||
(github.event_name == 'issue_comment' && (
github.event_name == 'issue_comment' &&
contains(github.event.comment.body, '@gemini-cli /deduplicate') && contains(github.event.comment.body, '@gemini-cli /deduplicate') &&
(github.event.comment.author_association == 'OWNER' || (
github.event.comment.author_association == 'OWNER' ||
github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'MEMBER' ||
github.event.comment.author_association == 'COLLABORATOR'))) github.event.comment.author_association == 'COLLABORATOR'
)
)
)
permissions: permissions:
issues: 'write' issues: 'write'
timeout-minutes: 5 timeout-minutes: 5
@ -177,34 +182,20 @@ jobs:
- name: 'Comment and Label Duplicate Issue' - name: 'Comment and Label Duplicate Issue'
uses: 'actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea' uses: 'actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea'
env: env:
DUPLICATES_OUTPUT: '${{ needs.find-duplicates.outputs.duplicate_issues_json }}' DUPLICATES_OUTPUT: '${{ needs.find-duplicates.outputs.duplicate_issues_csv }}'
with: with:
github-token: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}' github-token: '${{ steps.generate_token.outputs.token || secrets.GITHUB_TOKEN }}'
script: |- script: |-
const rawJson = process.env.DUPLICATES_OUTPUT; const rawCsv = process.env.DUPLICATES_OUTPUT;
core.info(`Raw duplicates JSON: ${rawJson}`); core.info(`Raw duplicates CSV: ${rawCsv}`);
let parsedJson; const duplicateIssues = rawCsv.split(',').map(s => s.trim()).filter(s => s);
try {
const jsonStringMatch = rawJson.match(/{[\s\S]*}/);
if (!jsonStringMatch) {
core.setFailed(`Could not find JSON object in the output.\nRaw output: ${rawJson}`);
return;
}
const jsonString = jsonStringMatch[0];
parsedJson = JSON.parse(jsonString);
core.info(`Parsed duplicates JSON: ${JSON.stringify(parsedJson)}`);
} catch (err) {
core.setFailed(`Failed to parse duplicates JSON from Gemini output: ${err.message}\nRaw output: ${rawJson}`);
return;
}
if (!parsedJson.duplicate_issues || parsedJson.duplicate_issues.length === 0) { if (duplicateIssues.length === 0) {
core.info('No duplicate issues found. Nothing to do.'); core.info('No duplicate issues found. Nothing to do.');
return; return;
} }
const issueNumber = ${{ github.event.issue.number }}; const issueNumber = ${{ github.event.issue.number }};
const duplicateIssues = parsedJson.duplicate_issues;
function formatCommentBody(issues, updated = false) { function formatCommentBody(issues, updated = false) {
const header = updated const header = updated