diff --git a/Runtime/Scripts/AudioStream.cs b/Runtime/Scripts/AudioStream.cs index f520b111..7915f466 100644 --- a/Runtime/Scripts/AudioStream.cs +++ b/Runtime/Scripts/AudioStream.cs @@ -18,6 +18,7 @@ public sealed class AudioStream : IDisposable private readonly AudioProbe _probe; private RingBuffer _buffer; private short[] _tempBuffer; + private short[] _crossfadeScratch; private uint _numChannels; private uint _sampleRate; private AudioResampler _resampler = new AudioResampler(); @@ -29,9 +30,15 @@ public sealed class AudioStream : IDisposable private const float BufferSizeSeconds = 0.2f; // 200ms ring buffer for all platforms private const float PrimingThresholdSeconds = 0.03f; // Wait for 30ms of data before playing - // Drift correction: skip samples when buffer fills up due to clock drift - private const float HighWaterMarkPercent = 0.50f; // Target 50% fill level after correction - private const float SkipPerCallbackPercent = 0.05f; // Skip 5% of callback samples per call + // Drift correction: skip samples when the buffer fills up due to clock drift. + // HWM at 50% (100ms of 200ms) so normal network jitter does not trip catch-up. + // Cooldown prevents back-to-back skips, which sound like a gravelly click train; + // one occasional skip is inaudible thanks to the crossfade in OnAudioRead. + private const float HighWaterMarkPercent = 0.50f; + private const float SkipPerCallbackPercent = 0.05f; + private const int SkipCooldownCallbacks = 10; + private const int CrossfadeFrames = 128; // ~2.7ms @ 48kHz + private int _skipCooldown = 0; /// /// Creates a new audio stream from a remote audio track, attaching it to the @@ -87,11 +94,13 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) _buffer?.Dispose(); _buffer = new RingBuffer(bufferSize * sizeof(short)); _tempBuffer = new short[data.Length]; + _crossfadeScratch = new short[CrossfadeFrames * channels]; _numChannels = (uint)channels; _sampleRate = (uint)sampleRate; // Buffer was recreated, need to re-prime _isPrimed = false; + _skipCooldown = 0; } static float S16ToFloat(short v) @@ -127,7 +136,7 @@ static float S16ToFloat(short v) if (valuesAvailableToRead < data.Length) { _isPrimed = false; - Utils.Debug($"AudioStream underrun detected, re-priming (got {valuesAvailableToRead} samples)"); + Utils.Debug($"AudioStream underrun detected, re-priming (got {valuesAvailableToRead} samples but want to read {data.Length})"); // Output silence immediately instead of playing partial/choppy samples. // On next frames, the !_isPrimed check above will ensure we wait for 30ms @@ -147,21 +156,50 @@ static float S16ToFloat(short v) // If the buffer is empty or doesn't have enough data, bytesRead will be less than requested. int samplesRead = bytesRead / sizeof(short); - // Drift correction: if buffer is filling up (producer faster than consumer), - // skip a small number of samples to prevent overflow and keep latency bounded. + // Drift correction: if the buffer is filling up (producer faster than + // consumer), discard a small run of samples and crossfade the output tail + // into the post-skip window so the seam is inaudible. Cooldown spaces + // skips out so we never produce back-to-back artifacts. + if (_skipCooldown > 0) + _skipCooldown--; + int highWaterBytes = (int)(_buffer.Capacity * HighWaterMarkPercent); - int remainingBytes = _buffer.AvailableRead(); - if (remainingBytes > highWaterBytes) + if (_skipCooldown == 0 && _buffer.AvailableRead() > highWaterBytes) { - int skipBytes = (int)(data.Length * sizeof(short) * SkipPerCallbackPercent); int frameSize = channels * sizeof(short); + int skipBytes = (int)(data.Length * sizeof(short) * SkipPerCallbackPercent); skipBytes -= skipBytes % frameSize; // align to frame boundary - if (skipBytes > 0) + int crossfadeShorts = CrossfadeFrames * channels; + int crossfadeBytes = crossfadeShorts * sizeof(short); + + // Only skip if we still have enough data left to fill the crossfade + // window; never trade a catch-up artifact for an underrun artifact. + if (skipBytes > 0 && _buffer.AvailableRead() >= skipBytes + crossfadeBytes) { _buffer.SkipRead(skipBytes); + + var postBytes = MemoryMarshal.Cast(_crossfadeScratch.AsSpan().Slice(0, crossfadeShorts)); + _buffer.Read(postBytes); + + // Linearly crossfade the last crossfadeFrames frames of _tempBuffer + // with the post-skip samples: the step discontinuity becomes a + // short linear ramp that is continuous with the next callback. + int tailStart = samplesRead - crossfadeShorts; + if (tailStart >= 0) + { + for (int i = 0; i < crossfadeShorts; i++) + { + int frameIdx = i / channels; + float t = (frameIdx + 1) / (float)CrossfadeFrames; + short pre = _tempBuffer[tailStart + i]; + short post = _crossfadeScratch[i]; + _tempBuffer[tailStart + i] = (short)(pre * (1f - t) + post * t); + } + _skipCooldown = SkipCooldownCallbacks; + } } - } + } // Clear the entire output buffer to silence, then fill with the samples // we successfully read from the ring buffer. @@ -174,7 +212,7 @@ static float S16ToFloat(short v) } // Called when application goes to background or returns to foreground - private void OnApplicationPause(bool pause) + internal void OnApplicationPause(bool pause) { if (_disposed) return; @@ -266,6 +304,7 @@ private void Dispose(bool disposing) _buffer?.Dispose(); _buffer = null; _tempBuffer = null; + _crossfadeScratch = null; _resampler?.Dispose(); _resampler = null; Handle.Dispose(); @@ -278,5 +317,17 @@ private void Dispose(bool disposing) { Dispose(false); } + + // For testing and debugging + internal float GetBufferFill() + { + lock(_lock) + { + if (_buffer == null) + return 0; + return _buffer.AvailableReadInPercent(); + } + + } } } diff --git a/Tests/PlayMode/AudioBufferTests.cs b/Tests/PlayMode/AudioBufferTests.cs new file mode 100644 index 00000000..5678e903 --- /dev/null +++ b/Tests/PlayMode/AudioBufferTests.cs @@ -0,0 +1,164 @@ +using System.Collections; +using UnityEngine.TestTools; +using LiveKit.PlayModeTests.Utils; +using LiveKit.Proto; +using NUnit.Framework; +using UnityEngine; + +namespace LiveKit.PlayModeTests +{ + class AudioBufferTests + { + const string AudioTrackName = "test-audio-track"; + static TrackPublishOptions AudioOptions() => + new TrackPublishOptions { Source = TrackSource.SourceMicrophone }; + + + AudioStream _audioStream; + AudioSource _audioSource; + + TestRoomContext _context; + + private IEnumerator SetUp() + { + var sineWaveAudioSource = new SineWaveAudioSource(); + + var sender = TestRoomContext.ConnectionOptions.Default; + sender.Identity = "sender"; + var receiver = TestRoomContext.ConnectionOptions.Default; + receiver.Identity = "receiver"; + + _context = new TestRoomContext(new[] { sender, receiver }); + var senderRoom = _context.Rooms[0]; + var subscriberRoom = _context.Rooms[1]; + + var audioTrack = LocalAudioTrack.CreateAudioTrack(AudioTrackName, sineWaveAudioSource, senderRoom); + + yield return _context.ConnectAll(); + Assert.IsNull(_context.ConnectionError, _context.ConnectionError); + + var publishAudioTrack = senderRoom.LocalParticipant.PublishTrack(audioTrack, AudioOptions()); + + var subscribedExp = new Expectation(timeoutSeconds: 10); + RemoteAudioTrack subscribedTrack = null; + subscriberRoom.TrackSubscribed += (remoteTrack, publication, participant) => + { + if (remoteTrack is RemoteAudioTrack remoteAudioTrack) + { + subscribedTrack = remoteAudioTrack; + subscribedExp.Fulfill(); + } + }; + + yield return publishAudioTrack; + Assert.IsFalse(publishAudioTrack.IsError); + + sineWaveAudioSource.Start(); + yield return subscribedExp.Wait(); + Assert.IsNull(subscribedExp.Error); + + // We need an AudioListener in the scene in order for an AudioThread to read from sources + var listenerGO = new GameObject("LatencyTestAudioListener"); + var audioListener = listenerGO.AddComponent(); + + // This GO will be hooked up with the AudioStream + var receiverGO = new GameObject("LatencyTestReceiver"); + _audioSource = receiverGO.AddComponent(); + + // AudioStream constructor adds AudioProbe and starts playback + _audioStream = new AudioStream(subscribedTrack, _audioSource); + + // Init buffer with one audio frame read + var readOneAudioFrame = new Expectation( () => { return _audioStream.GetBufferFill() != 0f; } ); + yield return readOneAudioFrame.Wait(); + } + + [UnityTest, Category("E2E")] + public IEnumerator AudioBuffer_WhenBufferRunsFull_TriggersCatchupMechanism() + { + yield return SetUp(); + + // Backgrounded + MockApplicationBackgrounded(); + + // Buffer is full at this point + var BufferFillsInBackgroundExpectation = new Expectation(() => { return _audioStream.GetBufferFill() == 1f; }); + yield return BufferFillsInBackgroundExpectation.Wait(); + Assert.IsNull(BufferFillsInBackgroundExpectation.Error); + + Debug.Log("Buffer is filled"); + + // Resume consumption without foregrounding event to prevent full buffer clear + _audioSource.UnPause(); + + // Buffer fill is quickly reduced due to catchup logic + var BufferFillReducedThroughCatchup = new Expectation(() => { return _audioStream.GetBufferFill() < 0.6f; }); + yield return BufferFillReducedThroughCatchup.Wait(); + Assert.IsNull(BufferFillReducedThroughCatchup.Error); + + _context.Dispose(); + } + + [UnityTest, Category("E2E")] + public IEnumerator AudioBuffer_FillLevelStaysStable() + { + yield return SetUp(); + + // Fill buffer above prime within short time + var bufferFillsAbovePrime = new Expectation(() => { return _audioStream.GetBufferFill() > 0.15f; }, 1f); + yield return bufferFillsAbovePrime.Wait(); + Assert.IsNull(bufferFillsAbovePrime.Error); + + // Buffer stays at stable levels over long time + var elapsedTime = 0f; + while (elapsedTime < 3f) + { + elapsedTime += Time.deltaTime; + Assert.That(_audioStream.GetBufferFill(), Is.LessThan(0.8f)); + yield return null; + } + + _context.Dispose(); + } + + [UnityTest, Category("E2E")] + public IEnumerator AudioBuffer_WhenAppForegrounded_BufferIsCleared() + { + yield return SetUp(); + + // Backgrounded + MockApplicationBackgrounded(); + + // Buffer is full at this point + var BufferFillsInBackgroundExpectation = new Expectation(() => { return _audioStream.GetBufferFill() == 1f; }); + yield return BufferFillsInBackgroundExpectation.Wait(); + Assert.IsNull(BufferFillsInBackgroundExpectation.Error); + + // Foregrounded + MockApplicationForegrounded(); + + // Buffer is cleared + var fillAfterForegrounded = _audioStream.GetBufferFill(); + Assert.That(fillAfterForegrounded, Is.EqualTo(0f)); + + // Buffer refills above primed threshold of 30ms, I give it 100ms for that + var BufferRefillsUntilPrimed = new Expectation(() => { return _audioStream.GetBufferFill() >= 0.15f; }, 0.1f); + yield return BufferFillsInBackgroundExpectation.Wait(); + Assert.IsNull(BufferFillsInBackgroundExpectation.Error); + + _context.Dispose(); + } + + private void MockApplicationBackgrounded() + { + _audioSource.Pause(); + _audioStream.OnApplicationPause(true); + } + + private void MockApplicationForegrounded() + { + _audioSource.UnPause(); + _audioStream.OnApplicationPause(false); + } + } +} \ No newline at end of file diff --git a/Tests/PlayMode/AudioBufferTests.cs.meta b/Tests/PlayMode/AudioBufferTests.cs.meta new file mode 100644 index 00000000..75a4c492 --- /dev/null +++ b/Tests/PlayMode/AudioBufferTests.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: d1f846be95f7e41f3955c68d98651a0d +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: