From 873db81b0052fc4d2c981bf5f790fa4d485bd42d Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Thu, 25 Sep 2025 13:56:17 -0400 Subject: [PATCH 01/24] add interrupt support --- .../com/google/firebase/ai/type/LiveSession.kt | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index a91d7e4aedf..191c9a49f8e 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -95,7 +95,8 @@ internal constructor( */ @RequiresPermission(RECORD_AUDIO) public suspend fun startAudioConversation( - functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, + enableInterruptions: Boolean? = null, ) { val context = firebaseApp.applicationContext @@ -120,7 +121,7 @@ internal constructor( recordUserAudio() processModelResponses(functionCallHandler) - listenForModelPlayback() + listenForModelPlayback(enableInterruptions) } } @@ -375,14 +376,16 @@ internal constructor( * * Launched asynchronously on [scope]. */ - private fun listenForModelPlayback() { + private fun listenForModelPlayback(enableInterruptions: Boolean? = null) { scope.launch { while (isActive) { val playbackData = playBackQueue.poll() if (playbackData == null) { // The model playback queue is complete, so we can continue recording // TODO(b/408223520): Conditionally resume when param is added - audioHelper?.resumeRecording() + if (enableInterruptions != true) { + audioHelper?.resumeRecording() + } yield() } else { /** @@ -390,7 +393,9 @@ internal constructor( * no echo cancellation */ // TODO(b/408223520): Conditionally pause when param is added - audioHelper?.pauseRecording() + if (enableInterruptions != true) { + audioHelper?.pauseRecording() + } audioHelper?.playAudio(playbackData) } From f2ff92b9b2c3a65d03ba6d698e13825b74d43887 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Mon, 6 Oct 2025 13:18:30 -0400 Subject: [PATCH 02/24] update --- firebase-ai/gradle.properties | 2 +- .../com/google/firebase/ai/common/util/android.kt | 12 ++++++++++++ .../com/google/firebase/ai/type/AudioHelper.kt | 2 +- .../com/google/firebase/ai/type/LiveSession.kt | 14 +++++++++++--- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index 794b7a23197..15e226a5aac 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=17.3.1 +version=99.9.9 latestReleasedVersion=17.3.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index 4d7a1e46097..d50bf5a549c 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -17,6 +17,7 @@ package com.google.firebase.ai.common.util import android.media.AudioRecord +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.flow import kotlinx.coroutines.yield @@ -36,15 +37,26 @@ internal val AudioRecord.minBufferSize: Int internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) + var startTime = System.currentTimeMillis() while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { + delay(10) yield() continue } + if (System.currentTimeMillis() - startTime >= 100) { + // This is the manual yield/pause point. + // Using delay(1) suspends the coroutine, freeing the thread + // for the dispatcher to run other tasks briefly. + delay(1) + yield() + startTime = System.currentTimeMillis() // Reset the timer + } val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { emit(buffer.copyOf(bytesRead)) } + yield() } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index 4db66ae6c3e..3100f6f9ff5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -140,8 +140,8 @@ internal class AudioHelper( * Returns an empty flow if this [AudioHelper] has been [released][release]. */ fun listenToRecording(): Flow { + println("Released: $released") if (released) return emptyFlow() - resumeRecording() return recorder.readAsFlow() diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index ecf4f8f0711..696fa1bb957 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -37,6 +37,7 @@ import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED import kotlinx.coroutines.flow.Flow @@ -174,9 +175,11 @@ internal constructor( response .getOrNull() ?.let { - JSON.decodeFromString( + val x = JSON.decodeFromString( it.readBytes().toString(Charsets.UTF_8) ) + println(x) + x } ?.let { emit(it.toPublic()) } yield() @@ -230,6 +233,7 @@ internal constructor( BidiGenerateContentToolResponseSetup(functionList.map { it.toInternalFunctionCall() }) .toInternal() ) + println("Sending function response $jsonString") session.send(Frame.Text(jsonString)) } } @@ -249,6 +253,7 @@ internal constructor( Json.encodeToString( BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) + println("Sending $jsonString") session.send(Frame.Text(jsonString)) } } @@ -305,7 +310,7 @@ internal constructor( ?.accumulateUntil(MIN_BUFFER_SIZE) ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) } ?.catch { throw FirebaseAIException.from(it) } - ?.launchIn(scope) + ?.launchIn(CoroutineScope(Dispatchers.IO)) } /** @@ -333,6 +338,7 @@ internal constructor( } else if (functionCallHandler != null) { // It's fine to suspend here since you can't have a function call running concurrently // with an audio response + println("Model is attempting to send a function call response") sendFunctionResponse(it.functionCalls.map(functionCallHandler).toList()) } else { Log.w( @@ -348,11 +354,13 @@ internal constructor( ) } is LiveServerContent -> { + println("State of it's interruption: ${it.interrupted}") if (it.interrupted) { playBackQueue.clear() } else { val audioParts = it.content?.parts?.filterIsInstance().orEmpty() for (part in audioParts) { + println("Model receiving ${part.inlineData}") playBackQueue.add(part.inlineData) } } @@ -396,7 +404,7 @@ internal constructor( if (enableInterruptions != true) { audioHelper?.pauseRecording() } - + println("Model playing $playbackData") audioHelper?.playAudio(playbackData) } } From 80e444b1c0f232c38e68a33f70be2e9734ae53f0 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Mon, 6 Oct 2025 15:38:01 -0400 Subject: [PATCH 03/24] add interrupt support --- firebase-ai/CHANGELOG.md | 2 + firebase-ai/api.txt | 6 ++- firebase-ai/gradle.properties | 2 +- .../google/firebase/ai/common/util/android.kt | 10 ---- .../firebase/ai/java/LiveSessionFutures.kt | 49 +++++++++++++++++++ .../google/firebase/ai/type/AudioHelper.kt | 1 - .../google/firebase/ai/type/LiveSession.kt | 18 +++---- 7 files changed, 63 insertions(+), 25 deletions(-) diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index 4432555a470..c6662411bd9 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -2,6 +2,8 @@ - [changed] **Breaking Change**: Removed the `candidateCount` option from `LiveGenerationConfig` - [changed] Added better error messages to `ServiceConnectionHandshakeFailedException` +- [changed] Added support for user interrupts for the `startAudioConversation` method in the + `LiveSession` class. # 17.3.0 diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index a390a14147e..576e61cf0e5 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -148,7 +148,9 @@ package com.google.firebase.ai.java { method public abstract com.google.common.util.concurrent.ListenableFuture sendFunctionResponse(java.util.List functionList); method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(); - method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(boolean enableInterruptions); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler, boolean enableInterruptions); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion; @@ -889,7 +891,7 @@ package com.google.firebase.ai.type { method public suspend Object? send(String text, kotlin.coroutines.Continuation); method public suspend Object? sendFunctionResponse(java.util.List functionList, kotlin.coroutines.Continuation); method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, Boolean? enableInterruptions = null, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index 15e226a5aac..a61baee5a19 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=99.9.9 +version=17.4.0 latestReleasedVersion=17.3.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index d50bf5a549c..e299d3164f5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -37,22 +37,12 @@ internal val AudioRecord.minBufferSize: Int internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) - var startTime = System.currentTimeMillis() while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { delay(10) yield() continue } - if (System.currentTimeMillis() - startTime >= 100) { - // This is the manual yield/pause point. - // Using delay(1) suspends the coroutine, freeing the thread - // for the dispatcher to run other tasks briefly. - delay(1) - yield() - startTime = System.currentTimeMillis() // Reset the timer - } - val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { emit(buffer.copyOf(bytesRead)) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index 1efa2dfedfc..a7c667834bd 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -47,6 +47,7 @@ public abstract class LiveSessionFutures internal constructor() { * @param functionCallHandler A callback function that is invoked whenever the model receives a * function call. */ + @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ): ListenableFuture @@ -58,6 +59,36 @@ public abstract class LiveSessionFutures internal constructor() { @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation(): ListenableFuture + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable + * would allow the user to talk while the model is responding. + * + * **WARNING**: User interruption might not work reliably across all devices. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. + * + * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable + * would allow the user to talk while the model is responding. + * + * **WARNING**: User interruption might not work reliably across all devices. + */ + @RequiresPermission(RECORD_AUDIO) + public abstract fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + enableInterruptions: Boolean + ): ListenableFuture + /** * Stops the audio conversation with the Gemini Server. * @@ -169,6 +200,24 @@ public abstract class LiveSessionFutures internal constructor() { override fun startAudioConversation() = SuspendToFutureAdapter.launchFuture { session.startAudioConversation() } + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation(enableInterruptions: Boolean) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation(enableInterruptions = enableInterruptions) + } + + @RequiresPermission(RECORD_AUDIO) + override fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?, + enableInterruptions: Boolean + ) = + SuspendToFutureAdapter.launchFuture { + session.startAudioConversation( + functionCallHandler, + enableInterruptions = enableInterruptions + ) + } + override fun stopAudioConversation() = SuspendToFutureAdapter.launchFuture { session.stopAudioConversation() } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index 3100f6f9ff5..08e90fc8538 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -140,7 +140,6 @@ internal class AudioHelper( * Returns an empty flow if this [AudioHelper] has been [released][release]. */ fun listenToRecording(): Flow { - println("Released: $released") if (released) return emptyFlow() resumeRecording() diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 696fa1bb957..616569f1026 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED import kotlinx.coroutines.flow.Flow @@ -93,6 +92,11 @@ internal constructor( * @param functionCallHandler A callback function that is invoked whenever the model receives a * function call. The [FunctionResponsePart] that the callback function returns will be * automatically sent to the model. + * + * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable + * would allow the user to talk while the model is responding. + * + * **WARNING**: User interruption might not work reliably across all devices. */ @RequiresPermission(RECORD_AUDIO) public suspend fun startAudioConversation( @@ -175,11 +179,9 @@ internal constructor( response .getOrNull() ?.let { - val x = JSON.decodeFromString( + JSON.decodeFromString( it.readBytes().toString(Charsets.UTF_8) ) - println(x) - x } ?.let { emit(it.toPublic()) } yield() @@ -233,7 +235,6 @@ internal constructor( BidiGenerateContentToolResponseSetup(functionList.map { it.toInternalFunctionCall() }) .toInternal() ) - println("Sending function response $jsonString") session.send(Frame.Text(jsonString)) } } @@ -253,7 +254,6 @@ internal constructor( Json.encodeToString( BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) - println("Sending $jsonString") session.send(Frame.Text(jsonString)) } } @@ -310,7 +310,7 @@ internal constructor( ?.accumulateUntil(MIN_BUFFER_SIZE) ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) } ?.catch { throw FirebaseAIException.from(it) } - ?.launchIn(CoroutineScope(Dispatchers.IO)) + ?.launchIn(scope) } /** @@ -338,7 +338,6 @@ internal constructor( } else if (functionCallHandler != null) { // It's fine to suspend here since you can't have a function call running concurrently // with an audio response - println("Model is attempting to send a function call response") sendFunctionResponse(it.functionCalls.map(functionCallHandler).toList()) } else { Log.w( @@ -354,13 +353,11 @@ internal constructor( ) } is LiveServerContent -> { - println("State of it's interruption: ${it.interrupted}") if (it.interrupted) { playBackQueue.clear() } else { val audioParts = it.content?.parts?.filterIsInstance().orEmpty() for (part in audioParts) { - println("Model receiving ${part.inlineData}") playBackQueue.add(part.inlineData) } } @@ -404,7 +401,6 @@ internal constructor( if (enableInterruptions != true) { audioHelper?.pauseRecording() } - println("Model playing $playbackData") audioHelper?.playAudio(playbackData) } } From 703177f30f482ec679637c23caf7bfe72578968e Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 7 Oct 2025 11:56:09 -0400 Subject: [PATCH 04/24] update --- firebase-ai/api.txt | 3 +- .../google/firebase/ai/common/util/android.kt | 3 +- .../firebase/ai/java/LiveSessionFutures.kt | 14 ++++++---- .../google/firebase/ai/type/LiveSession.kt | 28 +++++++++++++++---- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index d34060486f8..f8df1f045bc 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -893,7 +893,8 @@ package com.google.firebase.ai.type { method public suspend Object? send(String text, kotlin.coroutines.Continuation); method public suspend Object? sendFunctionResponse(java.util.List functionList, kotlin.coroutines.Continuation); method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); - method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, Boolean? enableInterruptions = null, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index e299d3164f5..e8ca4fcac08 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -17,6 +17,7 @@ package com.google.firebase.ai.common.util import android.media.AudioRecord +import kotlin.time.Duration.Companion.milliseconds import kotlinx.coroutines.delay import kotlinx.coroutines.flow.flow import kotlinx.coroutines.yield @@ -39,7 +40,7 @@ internal fun AudioRecord.readAsFlow() = flow { while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { - delay(10) + delay(10.milliseconds) yield() continue } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt index a7c667834bd..a9615ac2afb 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt @@ -63,10 +63,11 @@ public abstract class LiveSessionFutures internal constructor() { * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation] or [close]. * - * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable - * would allow the user to talk while the model is responding. + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. * - * **WARNING**: User interruption might not work reliably across all devices. + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. */ @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture @@ -78,10 +79,11 @@ public abstract class LiveSessionFutures internal constructor() { * @param functionCallHandler A callback function that is invoked whenever the model receives a * function call. * - * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable - * would allow the user to talk while the model is responding. + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. * - * **WARNING**: User interruption might not work reliably across all devices. + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. */ @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation( diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 616569f1026..c703cd959c3 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -92,16 +92,32 @@ internal constructor( * @param functionCallHandler A callback function that is invoked whenever the model receives a * function call. The [FunctionResponsePart] that the callback function returns will be * automatically sent to the model. + */ + @RequiresPermission(RECORD_AUDIO) + public suspend fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null + ) { + startAudioConversation(functionCallHandler, false) + } + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. * - * @param enableInterruptions Boolean to enable user to interrupt the model. Setting this variable - * would allow the user to talk while the model is responding. + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. * - * **WARNING**: User interruption might not work reliably across all devices. + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. */ @RequiresPermission(RECORD_AUDIO) public suspend fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, - enableInterruptions: Boolean? = null, + enableInterruptions: Boolean = false, ) { val context = firebaseApp.applicationContext @@ -381,14 +397,14 @@ internal constructor( * * Launched asynchronously on [scope]. */ - private fun listenForModelPlayback(enableInterruptions: Boolean? = null) { + private fun listenForModelPlayback(enableInterruptions: Boolean = false) { scope.launch { while (isActive) { val playbackData = playBackQueue.poll() if (playbackData == null) { // The model playback queue is complete, so we can continue recording // TODO(b/408223520): Conditionally resume when param is added - if (enableInterruptions != true) { + if (!enableInterruptions) { audioHelper?.resumeRecording() } yield() From 11d96a51f13f3c42e32e11661181da0c093b0480 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 7 Oct 2025 11:58:07 -0400 Subject: [PATCH 05/24] add comments --- .../main/kotlin/com/google/firebase/ai/common/util/android.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index e8ca4fcac08..6179c8b52e9 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -40,6 +40,7 @@ internal fun AudioRecord.readAsFlow() = flow { while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { + // TODO(vguthal): Investigate if both yield and delay are required. delay(10.milliseconds) yield() continue From 61090c4c476e71a4b42a5bfafc125b22cdd2e736 Mon Sep 17 00:00:00 2001 From: Vinay Guthal Date: Tue, 7 Oct 2025 12:02:41 -0400 Subject: [PATCH 06/24] Apply suggestion from @rlazo Co-authored-by: Rodrigo Lazo --- firebase-ai/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index 241bd98356e..a2b6c34dd75 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -2,7 +2,7 @@ - [changed] **Breaking Change**: Removed the `candidateCount` option from `LiveGenerationConfig` - [changed] Added support for user interrupts for the `startAudioConversation` method in the - `LiveSession` class. + `LiveSession` class. (#7413) - [changed] Added support for the URL context tool, which allows the model to access content from provided public web URLs to inform and enhance its responses. (#7382) - [changed] Added better error messages to `ServiceConnectionHandshakeFailedException` (#7412) From 50691eeda18005d8db4d295480010faeb8538fac Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 7 Oct 2025 15:36:20 -0400 Subject: [PATCH 07/24] test --- firebase-ai/gradle.properties | 2 +- .../kotlin/com/google/firebase/ai/type/AudioHelper.kt | 2 +- .../kotlin/com/google/firebase/ai/type/LiveSession.kt | 10 +++++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index a61baee5a19..15e226a5aac 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=17.4.0 +version=99.9.9 latestReleasedVersion=17.3.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index 08e90fc8538..edeb7c332f7 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -162,7 +162,7 @@ internal class AudioHelper( fun build(): AudioHelper { val playbackTrack = AudioTrack( - AudioAttributes.Builder().setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION).build(), + AudioAttributes.Builder().setUsage(AudioAttributes.USAGE_MEDIA).setContentType(AudioAttributes.CONTENT_TYPE_SPEECH).build(), AudioFormat.Builder() .setSampleRate(24000) .setChannelMask(AudioFormat.CHANNEL_OUT_MONO) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index c703cd959c3..3ee230a5a2a 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -37,6 +37,7 @@ import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED import kotlinx.coroutines.flow.Flow @@ -270,6 +271,7 @@ internal constructor( Json.encodeToString( BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) + println("Sending $jsonString") session.send(Frame.Text(jsonString)) } } @@ -372,6 +374,7 @@ internal constructor( if (it.interrupted) { playBackQueue.clear() } else { + println("Sending audio parts") val audioParts = it.content?.parts?.filterIsInstance().orEmpty() for (part in audioParts) { playBackQueue.add(part.inlineData) @@ -387,7 +390,7 @@ internal constructor( } } } - .launchIn(scope) + .launchIn(CoroutineScope(Dispatchers.IO)) } /** @@ -398,7 +401,7 @@ internal constructor( * Launched asynchronously on [scope]. */ private fun listenForModelPlayback(enableInterruptions: Boolean = false) { - scope.launch { + CoroutineScope(Dispatchers.IO).launch { while (isActive) { val playbackData = playBackQueue.poll() if (playbackData == null) { @@ -414,9 +417,10 @@ internal constructor( * no echo cancellation */ // TODO(b/408223520): Conditionally pause when param is added - if (enableInterruptions != true) { + if (!enableInterruptions) { audioHelper?.pauseRecording() } + println("Playing audio") audioHelper?.playAudio(playbackData) } } From 9a5e01af596f157221d928416084f23b9569f628 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:11:56 -0500 Subject: [PATCH 08/24] Use a callback flow --- .../kotlin/com/google/firebase/ai/common/util/android.kt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index 6179c8b52e9..a5d3e892a73 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -19,6 +19,7 @@ package com.google.firebase.ai.common.util import android.media.AudioRecord import kotlin.time.Duration.Companion.milliseconds import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.callbackFlow import kotlinx.coroutines.flow.flow import kotlinx.coroutines.yield @@ -35,7 +36,7 @@ internal val AudioRecord.minBufferSize: Int * * Will yield when this instance is not recording. */ -internal fun AudioRecord.readAsFlow() = flow { +internal fun AudioRecord.readAsFlow() = callbackFlow { val buffer = ByteArray(minBufferSize) while (true) { @@ -47,7 +48,7 @@ internal fun AudioRecord.readAsFlow() = flow { } val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { - emit(buffer.copyOf(bytesRead)) + send(buffer.copyOf(bytesRead)) } yield() } From f14a46175dcb1bb80b10e5ced518730bf418ab2c Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:12:07 -0500 Subject: [PATCH 09/24] Listen for cancellation --- .../main/kotlin/com/google/firebase/ai/common/util/android.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index a5d3e892a73..43dcf4cc115 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -21,6 +21,7 @@ import kotlin.time.Duration.Companion.milliseconds import kotlinx.coroutines.delay import kotlinx.coroutines.flow.callbackFlow import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.isActive import kotlinx.coroutines.yield /** @@ -39,7 +40,7 @@ internal val AudioRecord.minBufferSize: Int internal fun AudioRecord.readAsFlow() = callbackFlow { val buffer = ByteArray(minBufferSize) - while (true) { + while (isActive) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { // TODO(vguthal): Investigate if both yield and delay are required. delay(10.milliseconds) From 6d2428179ea5f63149c50bc4a251b946152102c3 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:12:26 -0500 Subject: [PATCH 10/24] Change print logging --- .../src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index f9507f5e8d3..c72d752f0f5 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -374,7 +374,7 @@ internal constructor( if (it.interrupted) { playBackQueue.clear() } else { - println("Sending audio parts") + println("Queuing audio parts from model") val audioParts = it.content?.parts?.filterIsInstance().orEmpty() for (part in audioParts) { playBackQueue.add(part.inlineData) From 6ac89dc624b0d20451c58fd5c9ecff761719b818 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:12:35 -0500 Subject: [PATCH 11/24] Log when audio data is played --- .../src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index c72d752f0f5..7d12ffcfb03 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -412,6 +412,7 @@ internal constructor( } yield() } else { + println("Playing audio data") /** * We pause the recording while the model is speaking to avoid interrupting it because of * no echo cancellation From fafcd3e4cecc0dab6f05408ec8e9e42f2dca471a Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:13:00 -0500 Subject: [PATCH 12/24] Revert IO change --- .../main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 7d12ffcfb03..bc69ec33d9b 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -390,7 +390,7 @@ internal constructor( } } } - .launchIn(CoroutineScope(Dispatchers.IO)) + .launchIn(scope) } /** @@ -401,7 +401,7 @@ internal constructor( * Launched asynchronously on [scope]. */ private fun listenForModelPlayback(enableInterruptions: Boolean = false) { - CoroutineScope(Dispatchers.IO).launch { + scope.launch { while (isActive) { val playbackData = playBackQueue.poll() if (playbackData == null) { From d597d48d754cbdc6e2181370ed98f96c6231920c Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:13:30 -0500 Subject: [PATCH 13/24] Add name to coroutine --- .../main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index bc69ec33d9b..f299a8c93db 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -33,6 +33,7 @@ import io.ktor.client.plugins.websocket.DefaultClientWebSocketSession import io.ktor.websocket.Frame import io.ktor.websocket.close import io.ktor.websocket.readBytes +import kotlinx.coroutines.CoroutineName import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext @@ -137,8 +138,8 @@ internal constructor( ) return@catchAsync } - - scope = CoroutineScope(blockingDispatcher + childJob()) + // TODO: maybe it should be THREAD_PRIORITY_AUDIO anyways for playback and recording (not network though) + scope = CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Scope")) audioHelper = AudioHelper.build() recordUserAudio() From ace97d235e1219c4892d2cc2e08892fd274be7d5 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:13:51 -0500 Subject: [PATCH 14/24] Use delay instead of yield --- .../kotlin/com/google/firebase/ai/common/util/android.kt | 6 ++---- .../main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index 43dcf4cc115..e9b1736977c 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -42,15 +42,13 @@ internal fun AudioRecord.readAsFlow() = callbackFlow { while (isActive) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { - // TODO(vguthal): Investigate if both yield and delay are required. - delay(10.milliseconds) - yield() + delay(0) continue } val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { send(buffer.copyOf(bytesRead)) } - yield() + delay(0) } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index f299a8c93db..104cddfca2f 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -41,6 +41,7 @@ import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.buffer import kotlinx.coroutines.flow.catch @@ -411,7 +412,7 @@ internal constructor( if (!enableInterruptions) { audioHelper?.resumeRecording() } - yield() + delay(0) } else { println("Playing audio data") /** From 031c38d6ffe0f9e53b6e0a9f186d2806fd7dac90 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:14:00 -0500 Subject: [PATCH 15/24] Add delay to sending audio data --- .../main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 104cddfca2f..3e3cc6063eb 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -328,7 +328,10 @@ internal constructor( ?.listenToRecording() ?.buffer(UNLIMITED) ?.accumulateUntil(MIN_BUFFER_SIZE) - ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) } + ?.onEach { + sendMediaStream(listOf(MediaData(it, "audio/pcm"))) + delay(0) + } ?.catch { throw FirebaseAIException.from(it) } ?.launchIn(scope) } From e7dd7fcee5687ccd37122147084aaa7cf812fada Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:14:10 -0500 Subject: [PATCH 16/24] Bump coroutines --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 74be10aa2ad..9f760b20104 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -16,7 +16,7 @@ benchmarkMacro = "1.3.4" browser = "1.3.0" cardview = "1.0.0" constraintlayout = "2.1.4" -coroutines = "1.9.0" +coroutines = "1.10.2" dagger = "2.51" # Don't bump above 2.51 as it causes a bug in AppDistro FeedbackSender JPEG code datastore = "1.1.7" dexmaker = "2.28.1" From ffa0ce9e05a6dbbeef9c8c22164b6cf97deedcd5 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 15 Oct 2025 13:35:38 -0500 Subject: [PATCH 17/24] Update missed yield --- .../main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 3e3cc6063eb..a5b169d12aa 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -51,7 +51,6 @@ import kotlinx.coroutines.flow.onCompletion import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.isActive import kotlinx.coroutines.launch -import kotlinx.coroutines.yield import kotlinx.serialization.ExperimentalSerializationApi import kotlinx.serialization.Serializable import kotlinx.serialization.encodeToString @@ -122,7 +121,6 @@ internal constructor( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, enableInterruptions: Boolean = false, ) { - val context = firebaseApp.applicationContext if ( ContextCompat.checkSelfPermission(context, RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED @@ -203,7 +201,7 @@ internal constructor( ) } ?.let { emit(it.toPublic()) } - yield() + delay(0) } } .onCompletion { stopAudioConversation() } From 082c51081e65485d49d827644ae5ab791615dbe7 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Thu, 16 Oct 2025 11:18:02 -0400 Subject: [PATCH 18/24] update --- firebase-ai/gradle.properties | 2 +- .../kotlin/com/google/firebase/ai/common/util/android.kt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index 15e226a5aac..c5c1aad6f62 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=99.9.9 +version=99.9.0 latestReleasedVersion=17.3.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index e9b1736977c..c020e94f415 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -37,17 +37,17 @@ internal val AudioRecord.minBufferSize: Int * * Will yield when this instance is not recording. */ -internal fun AudioRecord.readAsFlow() = callbackFlow { +internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) - while (isActive) { + while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { delay(0) continue } val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { - send(buffer.copyOf(bytesRead)) + emit(buffer.copyOf(bytesRead)) } delay(0) } From ecbda54a41f924749d76363198efa0cba185b61c Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Thu, 16 Oct 2025 14:07:30 -0500 Subject: [PATCH 19/24] fix(ai): Add audio dispatcher (#7483) Adds an audio dispatcher for dispatching the recording of the microphone and the playback of audio to separate threads with elevated priorities. The threads are also marked with `detectNetwork` to catch improper usage. This should help avoid weird deadlocks with coroutines and provide a smoother recording/playback experience in apps with higher thread traffic. --- .../google/firebase/ai/common/util/android.kt | 4 - .../google/firebase/ai/type/AudioHelper.kt | 5 +- .../google/firebase/ai/type/LiveSession.kt | 84 +++++++++++++++---- 3 files changed, 74 insertions(+), 19 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index c020e94f415..bb1d28e9746 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -17,12 +17,8 @@ package com.google.firebase.ai.common.util import android.media.AudioRecord -import kotlin.time.Duration.Companion.milliseconds import kotlinx.coroutines.delay -import kotlinx.coroutines.flow.callbackFlow import kotlinx.coroutines.flow.flow -import kotlinx.coroutines.isActive -import kotlinx.coroutines.yield /** * The minimum buffer size for this instance. diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index edeb7c332f7..06b4a3efe25 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -162,7 +162,10 @@ internal class AudioHelper( fun build(): AudioHelper { val playbackTrack = AudioTrack( - AudioAttributes.Builder().setUsage(AudioAttributes.USAGE_MEDIA).setContentType(AudioAttributes.CONTENT_TYPE_SPEECH).build(), + AudioAttributes.Builder() + .setUsage(AudioAttributes.USAGE_MEDIA) + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) + .build(), AudioFormat.Builder() .setSampleRate(24000) .setChannelMask(AudioFormat.CHANNEL_OUT_MONO) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index a5b169d12aa..6cc4ef2c4b4 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -17,12 +17,17 @@ package com.google.firebase.ai.type import android.Manifest.permission.RECORD_AUDIO +import android.annotation.SuppressLint import android.content.pm.PackageManager import android.media.AudioFormat import android.media.AudioTrack +import android.os.Process +import android.os.StrictMode +import android.os.StrictMode.ThreadPolicy import android.util.Log import androidx.annotation.RequiresPermission import androidx.core.content.ContextCompat +import com.google.firebase.BuildConfig import com.google.firebase.FirebaseApp import com.google.firebase.ai.common.JSON import com.google.firebase.ai.common.util.CancelledCoroutineScope @@ -33,12 +38,15 @@ import io.ktor.client.plugins.websocket.DefaultClientWebSocketSession import io.ktor.websocket.Frame import io.ktor.websocket.close import io.ktor.websocket.readBytes -import kotlinx.coroutines.CoroutineName import java.util.concurrent.ConcurrentLinkedQueue +import java.util.concurrent.Executors +import java.util.concurrent.ThreadFactory import java.util.concurrent.atomic.AtomicBoolean +import java.util.concurrent.atomic.AtomicLong import kotlin.coroutines.CoroutineContext +import kotlinx.coroutines.CoroutineName import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.asCoroutineDispatcher import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED import kotlinx.coroutines.delay @@ -46,6 +54,7 @@ import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.buffer import kotlinx.coroutines.flow.catch import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.flow.flowOn import kotlinx.coroutines.flow.launchIn import kotlinx.coroutines.flow.onCompletion import kotlinx.coroutines.flow.onEach @@ -67,11 +76,21 @@ internal constructor( private val firebaseApp: FirebaseApp, ) { /** - * Coroutine scope that we batch data on for [startAudioConversation]. + * Coroutine scope that we batch data on for network related behavior. + * + * Makes it easy to stop all the work with [stopAudioConversation] by just cancelling the scope. + */ + private var networkScope = CancelledCoroutineScope + + /** + * Coroutine scope that we batch data on for audio recording and playback. + * + * Separate from [networkScope] to ensure interchanging of dispatchers doesn't cause any deadlocks + * or issues. * * Makes it easy to stop all the work with [stopAudioConversation] by just cancelling the scope. */ - private var scope = CancelledCoroutineScope + private var audioScope = CancelledCoroutineScope /** * Playback audio data sent from the model. @@ -129,7 +148,7 @@ internal constructor( } FirebaseAIException.catchAsync { - if (scope.isActive) { + if (networkScope.isActive || audioScope.isActive) { Log.w( TAG, "startAudioConversation called after the recording has already started. " + @@ -137,8 +156,9 @@ internal constructor( ) return@catchAsync } - // TODO: maybe it should be THREAD_PRIORITY_AUDIO anyways for playback and recording (not network though) - scope = CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Scope")) + networkScope = + CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Network")) + audioScope = CoroutineScope(audioDispatcher + childJob() + CoroutineName("LiveSession Audio")) audioHelper = AudioHelper.build() recordUserAudio() @@ -158,7 +178,8 @@ internal constructor( FirebaseAIException.catch { if (!startedReceiving.getAndSet(false)) return@catch - scope.cancel() + networkScope.cancel() + audioScope.cancel() playBackQueue.clear() audioHelper?.release() @@ -228,7 +249,8 @@ internal constructor( FirebaseAIException.catch { if (!startedReceiving.getAndSet(false)) return@catch - scope.cancel() + networkScope.cancel() + audioScope.cancel() playBackQueue.clear() audioHelper?.release() @@ -325,13 +347,14 @@ internal constructor( audioHelper ?.listenToRecording() ?.buffer(UNLIMITED) + ?.flowOn(audioDispatcher) ?.accumulateUntil(MIN_BUFFER_SIZE) ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) delay(0) } ?.catch { throw FirebaseAIException.from(it) } - ?.launchIn(scope) + ?.launchIn(networkScope) } /** @@ -339,7 +362,7 @@ internal constructor( * * Audio messages are added to [playBackQueue]. * - * Launched asynchronously on [scope]. + * Launched asynchronously on [networkScope]. * * @param functionCallHandler A callback function that is invoked whenever the server receives a * function call. @@ -393,7 +416,7 @@ internal constructor( } } } - .launchIn(scope) + .launchIn(networkScope) } /** @@ -401,10 +424,10 @@ internal constructor( * * Polls [playBackQueue] for data, and calls [AudioHelper.playAudio] when data is received. * - * Launched asynchronously on [scope]. + * Launched asynchronously on [networkScope]. */ private fun listenForModelPlayback(enableInterruptions: Boolean = false) { - scope.launch { + audioScope.launch { while (isActive) { val playbackData = playBackQueue.poll() if (playbackData == null) { @@ -490,5 +513,38 @@ internal constructor( AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_PCM_16BIT ) + @SuppressLint("ThreadPoolCreation") + val audioDispatcher = + Executors.newCachedThreadPool(AudioThreadFactory()).asCoroutineDispatcher() + } +} + +internal class AudioThreadFactory : ThreadFactory { + private val threadCount = AtomicLong() + private val policy: ThreadPolicy = audioPolicy() + + override fun newThread(task: Runnable?): Thread? { + val thread = + DEFAULT.newThread { + Process.setThreadPriority(Process.THREAD_PRIORITY_AUDIO) + StrictMode.setThreadPolicy(policy) + task?.run() + } + thread.name = "Firebase Audio Thread #${threadCount.andIncrement}" + return thread + } + + companion object { + val DEFAULT: ThreadFactory = Executors.defaultThreadFactory() + + private fun audioPolicy(): ThreadPolicy { + val builder = ThreadPolicy.Builder().detectNetwork() + + if (BuildConfig.DEBUG) { + builder.penaltyDeath() + } + + return builder.penaltyLog().build() + } } } From 71c5189bde9e8125ee40c851c36ad24906612138 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Mon, 20 Oct 2025 13:42:12 -0400 Subject: [PATCH 20/24] update --- firebase-ai/gradle.properties | 2 +- .../src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index c5c1aad6f62..794b7a23197 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=99.9.0 +version=17.3.1 latestReleasedVersion=17.3.0 diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 6cc4ef2c4b4..bb6ad0b60eb 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -293,7 +293,6 @@ internal constructor( Json.encodeToString( BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) - println("Sending $jsonString") session.send(Frame.Text(jsonString)) } } @@ -400,7 +399,6 @@ internal constructor( if (it.interrupted) { playBackQueue.clear() } else { - println("Queuing audio parts from model") val audioParts = it.content?.parts?.filterIsInstance().orEmpty() for (part in audioParts) { playBackQueue.add(part.inlineData) @@ -438,7 +436,6 @@ internal constructor( } delay(0) } else { - println("Playing audio data") /** * We pause the recording while the model is speaking to avoid interrupting it because of * no echo cancellation From ffc634464d2505a30e91c583d685eadd3ee90c8c Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Mon, 20 Oct 2025 13:55:03 -0400 Subject: [PATCH 21/24] downgrade version --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 7358c633a08..e8636054f4e 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -16,7 +16,7 @@ benchmarkMacro = "1.3.4" browser = "1.3.0" cardview = "1.0.0" constraintlayout = "2.1.4" -coroutines = "1.10.2" +coroutines = "1.9.0" dagger = "2.51" # Don't bump above 2.51 as it causes a bug in AppDistro FeedbackSender JPEG code datastore = "1.1.7" dexmaker = "2.28.1" From 47b3e4178e841064c5a00b962b0856cc8b9ed04e Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Mon, 20 Oct 2025 14:54:11 -0400 Subject: [PATCH 22/24] update --- .../com/google/firebase/ai/common/util/android.kt | 4 ++++ .../kotlin/com/google/firebase/ai/type/LiveSession.kt | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt index bb1d28e9746..9f1bbd37260 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt @@ -38,6 +38,8 @@ internal fun AudioRecord.readAsFlow() = flow { while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { + // delay uses a different scheduler in the backend, so it's "stickier" in its enforcement when + // compared to yield. delay(0) continue } @@ -45,6 +47,8 @@ internal fun AudioRecord.readAsFlow() = flow { if (bytesRead > 0) { emit(buffer.copyOf(bytesRead)) } + // delay uses a different scheduler in the backend, so it's "stickier" in its enforcement when + // compared to yield. delay(0) } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index f6f33a7e7ce..37d6f5011cb 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -155,6 +155,10 @@ internal constructor( * function call. The [FunctionResponsePart] that the callback function returns will be * automatically sent to the model. * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's * ongoing reply. * @@ -250,6 +254,8 @@ internal constructor( ) } ?.let { emit(it.toPublic()) } + // delay uses a different scheduler in the backend, so it's "stickier" in its + // enforcement when compared to yield. delay(0) } } @@ -427,6 +433,8 @@ internal constructor( ?.accumulateUntil(MIN_BUFFER_SIZE) ?.onEach { sendAudioRealtime(InlineData(it, "audio/pcm")) + // delay uses a different scheduler in the backend, so it's "stickier" in its enforcement + // when compared to yield. delay(0) } ?.catch { throw FirebaseAIException.from(it) } @@ -515,6 +523,8 @@ internal constructor( if (!enableInterruptions) { audioHelper?.resumeRecording() } + // delay uses a different scheduler in the backend, so it's "stickier" in its enforcement + // when compared to yield. delay(0) } else { /** From 6ce387863cdec88fed812766f8c22f8bce0a4dd2 Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 21 Oct 2025 11:42:44 -0400 Subject: [PATCH 23/24] fix --- firebase-ai/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index 053febea25a..abf0bf55c68 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -1,5 +1,7 @@ # Unreleased +- [changed] Added better scheduling and louder output for Live API. +- [changed] Added support for input and output transcription. (#7482) - [feature] Added support for sending realtime audio and video in a `LiveSession`. - [changed] Removed redundant internal exception types. (#7475) From 62dd0236a8cde2d6c56991f8292d57f12759f0eb Mon Sep 17 00:00:00 2001 From: VinayGuthal Date: Tue, 21 Oct 2025 13:28:22 -0400 Subject: [PATCH 24/24] add audio callback function --- .../google/firebase/ai/type/AudioHelper.kt | 6 ++- .../google/firebase/ai/type/LiveSession.kt | 41 ++++++++++++++++++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt index 06b4a3efe25..62fbb740e77 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt @@ -159,7 +159,7 @@ internal class AudioHelper( * constructor. */ @RequiresPermission(Manifest.permission.RECORD_AUDIO) - fun build(): AudioHelper { + fun build(audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null): AudioHelper { val playbackTrack = AudioTrack( AudioAttributes.Builder() @@ -179,7 +179,6 @@ internal class AudioHelper( AudioTrack.MODE_STREAM, AudioManager.AUDIO_SESSION_ID_GENERATE ) - val bufferSize = AudioRecord.getMinBufferSize( 16000, @@ -208,6 +207,9 @@ internal class AudioHelper( if (AcousticEchoCanceler.isAvailable()) { AcousticEchoCanceler.create(recorder.audioSessionId)?.enabled = true } + if (audioHandler != null) { + audioHandler(recorder, playbackTrack) + } return AudioHelper(recorder, playbackTrack) } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt index 37d6f5011cb..1521d6dbcaf 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt @@ -20,6 +20,7 @@ import android.Manifest.permission.RECORD_AUDIO import android.annotation.SuppressLint import android.content.pm.PackageManager import android.media.AudioFormat +import android.media.AudioRecord import android.media.AudioTrack import android.os.Process import android.os.StrictMode @@ -166,9 +167,47 @@ internal constructor( * consistently available. */ @RequiresPermission(RECORD_AUDIO) + public suspend fun startAudioConversation( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, + transcriptHandler: ((Transcription?, Transcription?) -> Unit)?, + enableInterruptions: Boolean = false, + ) { + startAudioConversation( + functionCallHandler = functionCallHandler, + transcriptHandler = transcriptHandler, + audioHandler = null, + enableInterruptions = enableInterruptions + ) + } + + /** + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. + * + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. + * + * @param transcriptHandler A callback function that is invoked whenever the model receives a + * transcript. The first [Transcription] object is the input transcription, and the second is the + * output transcription. + * + * @param audioHandler A callback function that is invoked immediately following the successful + * initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final + * opportunity to apply custom configurations or modifications to these objects, which will remain + * valid and effective for the duration of the current audio session. + * + * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's + * ongoing reply. + * + * **WARNING**: The user interruption feature relies on device-specific support, and may not be + * consistently available. + */ + @RequiresPermission(RECORD_AUDIO) public suspend fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null, transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null, + audioHandler: ((AudioRecord, AudioTrack) -> Unit)? = null, enableInterruptions: Boolean = false, ) { @@ -191,7 +230,7 @@ internal constructor( networkScope = CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Network")) audioScope = CoroutineScope(audioDispatcher + childJob() + CoroutineName("LiveSession Audio")) - audioHelper = AudioHelper.build() + audioHelper = AudioHelper.build(audioHandler) recordUserAudio() processModelResponses(functionCallHandler, transcriptHandler)