Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 93 additions & 16 deletions agents/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -351,43 +351,120 @@ export class AsyncIterableQueue<T> implements AsyncIterableIterator<T> {
}

/** @internal */
// Ref: python livekit-agents/livekit/agents/utils/exp_filter.py - 5-64 lines
export class ExpFilter {
#alpha: number;
#max?: number;
#filtered?: number = undefined;
#maxValue?: number;
#minValue?: number;
#value?: number;

constructor(
alpha: number,
options:
| number
| {
initial?: number;
maxValue?: number;
minValue?: number;
} = {},
) {
if (!(alpha > 0 && alpha <= 1)) {
throw new Error('alpha must be in (0, 1].');
}

constructor(alpha: number, max?: number) {
this.#alpha = alpha;
this.#max = max;

if (typeof options === 'number') {
this.#maxValue = options;
return;
}

this.#value = options.initial;
this.#maxValue = options.maxValue;
this.#minValue = options.minValue;
}

reset(alpha?: number) {
if (alpha) {
reset(
options: {
alpha?: number;
initial?: number;
maxValue?: number;
minValue?: number;
} = {},
): void {
const { alpha, initial, maxValue, minValue } = options;

if (alpha !== undefined) {
if (!(alpha > 0 && alpha <= 1)) {
throw new Error('alpha must be in (0, 1].');
}
this.#alpha = alpha;
}
this.#filtered = undefined;

if (initial !== undefined) {
this.#value = initial;
}

if (minValue !== undefined) {
this.#minValue = minValue;
}

if (maxValue !== undefined) {
this.#maxValue = maxValue;
}
}

apply(exp: number, sample: number): number {
if (this.#filtered) {
apply(exp: number, sample = this.#value): number {
if (sample === undefined && this.#value === undefined) {
throw new Error('sample or initial value must be given.');
}

if (sample !== undefined && this.#value === undefined) {
this.#value = sample;
} else if (sample !== undefined && this.#value !== undefined) {
const a = this.#alpha ** exp;
this.#filtered = a * this.#filtered + (1 - a) * sample;
} else {
this.#filtered = sample;
this.#value = a * this.#value + (1 - a) * sample;
}

if (this.#value === undefined) {
throw new Error('sample or initial value must be given.');
}

if (this.#maxValue !== undefined && this.#value > this.#maxValue) {
this.#value = this.#maxValue;
}

if (this.#max && this.#filtered > this.#max) {
this.#filtered = this.#max;
if (this.#minValue !== undefined && this.#value < this.#minValue) {
this.#value = this.#minValue;
}

return this.#filtered;
return this.#value;
}

get value(): number | undefined {
return this.#value;
}

get filtered(): number | undefined {
return this.#filtered;
return this.#value;
}

get alpha(): number {
return this.#alpha;
}

get minValue(): number | undefined {
return this.#minValue;
}

get maxValue(): number | undefined {
return this.#maxValue;
}

set alpha(alpha: number) {
if (!(alpha > 0 && alpha <= 1)) {
throw new Error('alpha must be in (0, 1].');
}
this.#alpha = alpha;
}
}
Expand Down
55 changes: 32 additions & 23 deletions agents/src/voice/agent_activity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ import {
type RecognitionHooks,
type STTPipeline,
} from './audio_recognition.js';
import { createEndpointing } from './endpointing.js';
import {
AgentSessionEventTypes,
createErrorEvent,
Expand All @@ -88,6 +89,7 @@ import {
} from './generation.js';
import type { TimedString } from './io.js';
import { SpeechHandle } from './speech_handle.js';
import { stripUndefined } from './turn_config/utils.js';
import { setParticipantSpanAttributes } from './utils.js';

export const agentActivityStorage = new AsyncLocalStorage<AgentActivity>();
Expand Down Expand Up @@ -188,6 +190,7 @@ export class AgentActivity implements RecognitionHooks {
private isInterruptionDetectionEnabled: boolean;
private isInterruptionByAudioActivityEnabled: boolean;
private isDefaultInterruptionByAudioActivityEnabled: boolean;
private interruptionDetected = false;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Missing // Ref: Python reference comment on interruptionDetected state additions

Per CLAUDE.md rules, every JS change that corresponds to a Python change must carry an inline // Ref: python <relative-file-path> - <line-range> lines comment directly above the relevant line(s). The new interruptionDetected field and its assignment in onInterruptionOverlappingSpeech are clearly ported from the Python agent and lack the required reference comment.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


private readonly onRealtimeGenerationCreated = (ev: GenerationCreatedEvent): void =>
this.onGenerationCreated(ev);
Expand All @@ -206,6 +209,7 @@ export class AgentActivity implements RecognitionHooks {
this.onError(ev);

private readonly onInterruptionOverlappingSpeech = (ev: OverlappingSpeechEvent): void => {
this.interruptionDetected = ev.isInterruption;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Missing // Ref: Python reference comment on interruptionDetected assignment in onInterruptionOverlappingSpeech

Per CLAUDE.md rules, every JS change that corresponds to a Python change must carry an inline // Ref: comment. The new line this.interruptionDetected = ev.isInterruption; inside the onInterruptionOverlappingSpeech handler is ported from Python and lacks the required reference comment.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

Comment thread
u9g marked this conversation as resolved.
this.agentSession.emit(AgentSessionEventTypes.OverlappingSpeech, ev);
};

Expand Down Expand Up @@ -477,12 +481,11 @@ export class AgentActivity implements RecognitionHooks {
turnDetector: typeof this.turnDetection === 'string' ? undefined : this.turnDetection,
turnDetectionMode: this.turnDetectionMode,
interruptionDetection: this.interruptionDetector,
minEndpointingDelay:
this.agent.turnHandling?.endpointing?.minDelay ??
this.agentSession.sessionOptions.turnHandling.endpointing.minDelay,
maxEndpointingDelay:
this.agent.turnHandling?.endpointing?.maxDelay ??
this.agentSession.sessionOptions.turnHandling.endpointing.maxDelay,
// Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 321-328 lines
endpointing: createEndpointing({
...this.agentSession.sessionOptions.turnHandling.endpointing,
...stripUndefined(this.agent.turnHandling?.endpointing ?? {}),
}),
rootSpanContext: this.agentSession.rootSpanContext,
sttModel: this.stt?.label,
sttProvider: this.getSttProvider(),
Expand Down Expand Up @@ -922,12 +925,10 @@ export class AgentActivity implements RecognitionHooks {

if (!this.vad) {
this.agentSession._updateUserState('speaking');
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
this.audioRecognition.onStartOfOverlapSpeech(
0,
Date.now(),
this.agentSession._userSpeakingSpan,
);
this.interruptionDetected = false;
if (this.audioRecognition) {
// Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1490-1498 lines
this.audioRecognition.onStartOfSpeech(Date.now(), 0, this.agentSession._userSpeakingSpan);
}
}

Expand All @@ -947,8 +948,13 @@ export class AgentActivity implements RecognitionHooks {
this.logger.info(ev, 'onInputSpeechStopped');

if (!this.vad) {
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
this.audioRecognition.onEndOfOverlapSpeech(Date.now(), this.agentSession._userSpeakingSpan);
if (this.audioRecognition) {
// Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1508-1516 lines
this.audioRecognition.onEndOfSpeech(
Date.now(),
this.agentSession._userSpeakingSpan,
this.isInterruptionDetectionEnabled && !this.interruptionDetected,
);
}
this.agentSession._updateUserState('listening');
}
Expand Down Expand Up @@ -1030,11 +1036,12 @@ export class AgentActivity implements RecognitionHooks {
lastSpeakingTime: speechStartTime,
otelContext: otelContext.active(),
});
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
// Pass speechStartTime as the absolute startedAt timestamp.
this.audioRecognition.onStartOfOverlapSpeech(
ev.speechDuration,
this.interruptionDetected = false;
if (this.audioRecognition) {
// Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1645-1656 lines
this.audioRecognition.onStartOfSpeech(
speechStartTime,
ev?.speechDuration ?? 0,
this.agentSession._userSpeakingSpan,
);
}
Expand All @@ -1046,11 +1053,12 @@ export class AgentActivity implements RecognitionHooks {
// Subtract both silenceDuration and inferenceDuration to correct for VAD model latency.
speechEndTime = speechEndTime - ev.silenceDuration - ev.inferenceDuration;
}
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
// Pass speechEndTime as the absolute endedAt timestamp.
this.audioRecognition.onEndOfOverlapSpeech(
if (this.audioRecognition) {
// Ref: python livekit-agents/livekit/agents/voice/agent_activity.py - 1665-1679 lines
this.audioRecognition.onEndOfSpeech(
speechEndTime,
this.agentSession._userSpeakingSpan,
this.isInterruptionDetectionEnabled && !this.interruptionDetected,
);
}
this.agentSession._updateUserState('listening', {
Expand Down Expand Up @@ -1127,6 +1135,7 @@ export class AgentActivity implements RecognitionHooks {
}

onInterruption(ev: OverlappingSpeechEvent) {
this.interruptionDetected = true;
this.restoreInterruptionByAudioActivity();
this.interruptByAudioActivity();
if (this.audioRecognition) {
Expand Down Expand Up @@ -1838,7 +1847,7 @@ export class AgentActivity implements RecognitionHooks {
otelContext: speechHandle._agentTurnContext,
});
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
this.audioRecognition.onStartOfAgentSpeech();
this.audioRecognition.onStartOfAgentSpeech(replyStartedSpeakingAt);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Missing // Ref: Python reference comment on onStartOfAgentSpeech call-site change in ttsTask

Per CLAUDE.md rules, every JS change that corresponds to a Python change must carry an inline // Ref: comment. The call to this.audioRecognition.onStartOfAgentSpeech(replyStartedSpeakingAt) was changed from the no-arg form this.audioRecognition.onStartOfAgentSpeech() to pass a timestamp — a Python-ported behavioral change — but has no // Ref: comment.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

this.isInterruptionByAudioActivityEnabled = false;
}
};
Expand Down Expand Up @@ -2114,7 +2123,7 @@ export class AgentActivity implements RecognitionHooks {
otelContext: speechHandle._agentTurnContext,
});
if (this.isInterruptionDetectionEnabled && this.audioRecognition) {
this.audioRecognition.onStartOfAgentSpeech();
this.audioRecognition.onStartOfAgentSpeech(agentStartedSpeakingAt);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Missing // Ref: Python reference comment on onStartOfAgentSpeech call-site change in _pipelineReplyTaskImpl

Per CLAUDE.md rules, every JS change that corresponds to a Python change must carry an inline // Ref: comment. The call to this.audioRecognition.onStartOfAgentSpeech(agentStartedSpeakingAt) was changed from the no-arg form to pass a timestamp — a Python-ported behavioral change — but has no // Ref: comment.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

this.isInterruptionByAudioActivityEnabled = false;
}
};
Expand Down
Loading
Loading