Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 77 additions & 171 deletions setup/js/assign_agent_helpers.cjs

Large diffs are not rendered by default.

73 changes: 54 additions & 19 deletions setup/js/assign_to_agent.cjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// @ts-check
/// <reference types="@actions/github-script" />

const { AGENT_LOGIN_NAMES, getAvailableAgentLogins, findAgent, getIssueDetails, getPullRequestDetails, assignAgentToIssue, generatePermissionErrorSummary } = require("./assign_agent_helpers.cjs");
const { AGENT_LOGIN_NAMES, getAgentLogins, getAvailableAgentLogins, findAgent, getIssueDetails, getPullRequestDetails, assignAgentToIssue, generatePermissionErrorSummary } = require("./assign_agent_helpers.cjs");
const { getErrorMessage } = require("./error_helpers.cjs");
const { resolveTarget, isStagedMode } = require("./safe_output_helpers.cjs");
const { generateStagedPreview } = require("./staged_preview.cjs");
Expand Down Expand Up @@ -327,15 +327,15 @@ async function main(config = {}) {

try {
// Find agent (use cache to avoid repeated lookups)
let agentId = agentCache[agentName];
if (!agentId) {
let agentLogin = agentCache[agentName];
if (!agentLogin) {
core.info(`Looking for ${agentName} coding agent...`);
agentId = await findAgent(effectiveOwner, effectiveRepo, agentName, issueNumber || pullNumber, githubClient);
if (!agentId) {
agentLogin = await findAgent(effectiveOwner, effectiveRepo, agentName, issueNumber || pullNumber, githubClient);
if (!agentLogin) {
throw new Error(`${agentName} coding agent is not available for this repository`);
}
agentCache[agentName] = agentId;
core.info(`Found ${agentName} coding agent (ID: ${agentId})`);
agentCache[agentName] = agentLogin;
core.info(`Found ${agentName} coding agent (login: ${agentLogin})`);
}

// Get issue or PR details
Expand Down Expand Up @@ -371,7 +371,8 @@ async function main(config = {}) {
// Skip if agent is already assigned and no explicit per-item pull_request_repo is specified.
// When a different pull_request_repo is provided on the message, allow re-assignment
// so Copilot can be triggered for a different target repository on the same issue.
if (currentAssignees.some(a => a.id === agentId) && !shouldAllowReassignment) {
const knownLogins = getAgentLogins(agentName);
if (currentAssignees.some(a => a.login === agentLogin || knownLogins.includes(a.login)) && !shouldAllowReassignment) {
core.info(`${agentName} is already assigned to ${type} #${number}`);
_allResults.push({ issue_number: issueNumber, pull_number: pullNumber, agent: agentName, owner: effectiveOwner, repo: effectiveRepo, pull_request_repo: effectivePullRequestRepoSlug, success: true });
return { success: true };
Expand All @@ -383,7 +384,7 @@ async function main(config = {}) {
if (customInstructions) core.info(`Using custom instructions: ${customInstructions.substring(0, 100)}${customInstructions.length > 100 ? "..." : ""}`);
if (effectiveBaseBranch) core.info(`Using base branch: ${effectiveBaseBranch}`);

const success = await assignAgentToIssue(assignableId, agentId, currentAssignees, agentName, allowedAgents, model, customAgent, customInstructions, effectiveBaseBranch, githubClient, taskContext, effectivePullRequestRepoSlug);
const success = await assignAgentToIssue(assignableId, agentLogin, currentAssignees, agentName, allowedAgents, model, customAgent, customInstructions, effectiveBaseBranch, githubClient, taskContext, effectivePullRequestRepoSlug);
if (!success) throw new Error(`Failed to assign ${agentName} via REST`);

core.info(`Successfully assigned ${agentName} coding agent to ${type} #${number}`);
Expand All @@ -392,14 +393,42 @@ async function main(config = {}) {
} catch (error) {
let errorMessage = getErrorMessage(error);

// When the agent specified an issue_number that turns out to be a PR, skip
// silently without posting a comment — error comments on PRs are confusing.
if (/** @type {any} */ error.isPullRequest) {
core.warning(`Skipping assign_to_agent for #${number}: target is a pull request, not an issue.`);
_allResults.push({
issue_number: issueNumber,
pull_number: pullNumber,
agent: agentName,
owner: effectiveOwner,
repo: effectiveRepo,
pull_request_repo: effectivePullRequestRepoSlug,
success: false,
skipped: true,
error: errorMessage,
});
return { success: false, skipped: true, error: errorMessage };
}

const isAuthError = ["Bad credentials", "Not Authenticated", "Resource not accessible", "Insufficient permissions", "requires authentication"].some(msg => errorMessage.includes(msg));
const isAvailabilityError = errorMessage.includes("coding agent is not available for this repository");

if (ignoreIfError && (isAuthError || isAvailabilityError)) {
const errorType = isAuthError ? "authentication/permission" : "agent availability";
core.warning(`Agent assignment failed for ${agentName} on ${type} #${number} due to ${errorType} error. Skipping due to ignore-if-error=true.`);
core.info(`Error details: ${errorMessage}`);
_allResults.push({ issue_number: issueNumber, pull_number: pullNumber, agent: agentName, owner: effectiveOwner, repo: effectiveRepo, pull_request_repo: effectivePullRequestRepoSlug, success: true, skipped: true });
_allResults.push({
issue_number: issueNumber,
pull_number: pullNumber,
agent: agentName,
owner: effectiveOwner,
repo: effectiveRepo,
pull_request_repo: effectivePullRequestRepoSlug,
success: true,
skipped: true,
error: errorMessage,
});
return { success: true, skipped: true };
}

Expand Down Expand Up @@ -451,18 +480,24 @@ function getAssignToAgentAssigned() {

/**
* Returns the "assignment_errors" output string for step outputs.
* Format: "issue:N:agent:error" or "pr:N:agent:error" per failure, newline-separated.
* Format: "issue:N:agent:error" or "pr:N:agent:error" per failure/skipped-with-error,
* newline-separated.
* @returns {string}
*/
function getAssignToAgentErrors() {
return _allResults
.filter(r => !r.success && !r.skipped)
.map(r => {
const number = r.issue_number || r.pull_number;
const prefix = r.issue_number ? "issue" : "pr";
return `${prefix}:${number}:${r.agent}:${r.error}`;
})
.join("\n");
return (
_allResults
// Include skipped(ignore-if-error) entries that still captured an error so
// downstream failure handling can surface assignment problems in issue/comment reports.
// Include hard failures (!success) and ignored failures (skipped=true with error).
.filter(r => r.error && (r.skipped || !r.success))
.map(r => {
const number = r.issue_number || r.pull_number;
const prefix = r.issue_number ? "issue" : "pr";
return `${prefix}:${number}:${r.agent}:${r.error}`;
})
.join("\n")
);
}

/**
Expand Down
3 changes: 2 additions & 1 deletion setup/js/claude_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -410,10 +410,11 @@ async function main() {
}

const nonRetryableGuard = detectNonRetryableHarnessGuard(result.output);
if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests) {
if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.maxRunsExceeded) {
const reasons = [];
if (nonRetryableGuard.aiCreditsExceeded) reasons.push("AI credits budget exceeded");
if (nonRetryableGuard.awfAPIProxyBlockingRequests) reasons.push("AWF API proxy is blocking requests");
if (nonRetryableGuard.maxRunsExceeded) reasons.push("maximum LLM invocations exceeded");
log(`attempt ${attempt + 1}: ${reasons.join(" and ")} — not retrying (non-retryable guard condition)`);
break;
}
Expand Down
48 changes: 43 additions & 5 deletions setup/js/codex_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,23 @@ const BACKOFF_MULTIPLIER = 2;
// Maximum delay cap in milliseconds
const MAX_DELAY_MS = 60000;

// Pattern to detect OpenAI rate-limit errors (HTTP 429).
// Matches "rate_limit_exceeded" from the OpenAI error type field and the "429" status code
// that Codex emits when the API rate limit is hit.
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
// Pattern to detect OpenAI rate-limit errors.
// Matches the JSON error type field ("rate_limit_exceeded"), the HTTP status code
// ("429 Too Many Requests"), the client-side exception class ("RateLimitError"), and
// the human-readable message Codex emits inside "Reconnecting..." / error lines:
// "Rate limit reached for <model> in organization <org> on tokens per min (TPM): ..."
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError|Rate limit reached for [^\s]+(?: in organization [^\s]+)? on tokens per min/i;

// Pattern to detect when Codex's internal stream-reconnect budget is fully spent.
// Codex emits "Reconnecting... N/N (reason)" where both numbers are the same when
// the reconnect is the last allowed attempt. Seeing this pattern together with a
// rate-limit error means the session cannot make forward progress: every reconnect
// attempt immediately fails with the same rate-limit, and a fresh harness run will
// re-encounter the same limit since the same work pattern consumes the same TPM budget.
//
// The backreference \1 requires the two numeric parts of "N/N" to be identical —
// "5/5" matches (exhausted) but "1/5", "3/5", "4/5" do not (still retrying).
const RECONNECT_EXHAUSTED_PATTERN = /Reconnecting\.\.\.\s+(\d+)\/\1\b/;
const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;

// Pattern to detect a missing API key at startup — Codex emits this before making any API
Expand Down Expand Up @@ -130,6 +143,20 @@ function isInvalidModelError(output) {
return INVALID_MODEL_ERROR_PATTERN.test(output);
}

/**
* Determines if the collected output shows that Codex's internal stream-reconnect
* retries are exhausted (i.e., the output contains "Reconnecting... N/N" where both
* numbers are the same, indicating the last reconnect attempt).
*
* When this is true together with a rate-limit error, retrying from scratch would
* immediately encounter the same rate limit and drain the token budget further.
* @param {string} output - Collected stdout+stderr from the process
* @returns {boolean}
*/
function isReconnectExhaustedError(output) {
return RECONNECT_EXHAUSTED_PATTERN.test(output);
}

/**
* Resolve --prompt-file arguments for the Codex run.
* Strips the --prompt-file <path> pair from args and appends the file content
Expand Down Expand Up @@ -439,11 +466,12 @@ async function main() {
}

const nonRetryableGuard = detectNonRetryableHarnessGuard(result.output);
if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive) {
if (nonRetryableGuard.aiCreditsExceeded || nonRetryableGuard.awfAPIProxyBlockingRequests || nonRetryableGuard.goalAlreadyActive || nonRetryableGuard.maxRunsExceeded) {
const reasons = [];
if (nonRetryableGuard.aiCreditsExceeded) reasons.push("AI credits budget exceeded");
if (nonRetryableGuard.awfAPIProxyBlockingRequests) reasons.push("AWF API proxy is blocking requests");
if (nonRetryableGuard.goalAlreadyActive) reasons.push("goal is already active for this thread (use update_goal when the current goal is complete)");
if (nonRetryableGuard.maxRunsExceeded) reasons.push("maximum LLM invocations exceeded");
log(`attempt ${attempt + 1}: ${reasons.join(" and ")} — not retrying (non-retryable guard condition)`);
break;
}
Expand All @@ -470,6 +498,15 @@ async function main() {
break;
}

// Codex's internal stream-reconnect retries are exhausted and the root cause is a
// rate-limit error. Each reconnect attempt immediately failed with the same limit,
// so a fresh harness run will encounter the same rate-limit at the same point in the
// session and drain the token budget further without making progress.
if (isRateLimit && isReconnectExhaustedError(result.output)) {
log(`attempt ${attempt + 1}: rate-limit with exhausted reconnects — not retrying (fresh run would hit the same rate limit)`);
break;
}

// Retry when the session was partially executed (has output) or on well-known
// transient errors (rate limit, server error) even without output.
const isTransient = isRateLimit || isServer;
Expand Down Expand Up @@ -504,6 +541,7 @@ if (typeof module !== "undefined" && module.exports) {
isMissingApiKeyError,
isServerError,
isInvalidModelError,
isReconnectExhaustedError,
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
extractDeniedCommands,
Expand Down
Loading
Loading