Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions Echo.Tests/UnitTests/EchoOrchestratorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,24 @@ public async Task OnRecordingTriggered_StartsAudioRecording()
}

[Fact]
public async Task OnRecordingReleased_ProcessesAudioAndInsertsText()
public async Task ProcessRecordingPayloadAsync_ProcessesAudioAndInsertsText()
{
// Arrange
var fakeStream = new MemoryStream();
var fakeStream = new MemoryStream(new byte[33000]);
string expectedText = "Hello, world!";

_audioMock.Setup(a => a.StopRecording()).Returns(fakeStream);
_whisperMock.Setup(w => w.ProcessAudioAsync(fakeStream)).ReturnsAsync(expectedText);
_audioMock.Setup(a => a.StopRecording())
.Returns(fakeStream);

await _orchestrator.StartAsync(CancellationToken.None);
// We can safely match the exact stream instance now
_whisperMock.Setup(w => w.ProcessAudioAsync(fakeStream))
.ReturnsAsync(expectedText);

// Act
_monitorMock.Raise(m => m.OnRecordingReleased += null, EventArgs.Empty);
_textMock.Setup(t => t.InsertTextAsync(expectedText))
.Returns(Task.CompletedTask);

// Method uses Task.Run, so we have to wait a bit
await Task.Delay(100);
// Act
await _orchestrator.ProcessRecordingPayloadAsync();

// Assert
_audioMock.Verify(a => a.StopRecording(), Times.Once);
Expand Down
1 change: 1 addition & 0 deletions Echo/Constants/LoggerConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ internal class LoggerConstants
public const string Tab = " ";
public const string DoubleTab = $"{Tab}{Tab}";

public const string EarEmoji = "👂";
public const string FolderEmoji = "📂";
public const string LoadingEmoji = "🔄";
public const string HourglassEmoji = "⏳";
Expand Down
41 changes: 23 additions & 18 deletions Echo/Services/BackgroundServices/EchoOrchestrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
try
{
Log.Warning(
"...................Ech🦻 (v.{Version}) is checking dependencies................",
"...................Ech{EarEmoji} (v.{Version}) is checking dependencies................",
LoggerConstants.EarEmoji,
Assembly.GetExecutingAssembly().GetName().Version?.ToString(3));

_assetsProvider.InitializeAssetsDirectory();
Expand All @@ -56,7 +57,9 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)

await _pushToTalkMonitorService.StartListeningAsync();

Log.Warning("...................Ech🦻 is ready and running....................");
Log.Warning(
"...................Ech{EarEmoji} is ready and running....................",
LoggerConstants.EarEmoji);
}
catch (Exception exc)
{
Expand All @@ -81,22 +84,7 @@ private void HandlePushToTalkReleased(object? sender, EventArgs e)
_logger.LogInformation("{WhiteSquareButtonEmoji} Push-to-talk released.",
LoggerConstants.WhiteSquareButtonEmoji);

Task.Run(async () =>
{
try
{
MemoryStream? audioStream = _audioRecordingService.StopRecording();

string? textFromAudio = await _whisperInferenceService.ProcessAudioAsync(audioStream);

await _textInsertionService.InsertTextAsync(textFromAudio);
}
catch (Exception exc)
{
_logger.LogError(exc, "{ExplosionEmoji} Error: {ErrorMessage}",
LoggerConstants.ExplosionEmoji, exc.Message);
}
});
_ = Task.Run(ProcessRecordingPayloadAsync);
}

/// <inheritdoc/>
Expand All @@ -107,4 +95,21 @@ public override void Dispose()

base.Dispose();
}

internal async Task ProcessRecordingPayloadAsync()
{
try
{
MemoryStream? audioStream = _audioRecordingService.StopRecording();

string? textFromAudio = await _whisperInferenceService.ProcessAudioAsync(audioStream);

await _textInsertionService.InsertTextAsync(textFromAudio);
}
catch (Exception exc)
{
_logger.LogError(exc, "{ExplosionEmoji} Error: {ErrorMessage}",
LoggerConstants.ExplosionEmoji, exc.Message);
}
}
}
24 changes: 12 additions & 12 deletions Readme.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![.NET](https://img.shields.io/badge/.NET-8.0-purple.svg)](https://dotnet.microsoft.com/)

# Ech🦻 Voice Assistant
# Ech🦻 Voice Typing Assistant

Echo is a blazing-fast, privacy-first, push-to-talk voice assistant. It runs locally on your machine, leveraging the power of **OpenAI's Whisper** to transcribe your speech into text and automatically insert it wherever your text cursor is active.
Echo is a blazing-fast, privacy-first, push-to-talk voice typing assistant. It runs locally on your machine, leveraging the power of **OpenAI's Whisper** to transcribe your speech into text and automatically insert it wherever your text cursor is active.
**It also features seamless auto-translation to english language:** speak in your native language (Russian, Spanish, German, etc.), and Echo will instantly translate it to perfect English.
This makes it an ideal tool for bilingual workflows, coding, and writing documentation! *(Note: Auto-translation works best with English-only models like `ggml-base.en.bin`).*
It features a "Hot Mic" architecture for zero-latency recording and advanced Voice Activity Detection (VAD) to ensure perfect transcriptions without cutting off your first words.
Expand Down Expand Up @@ -47,7 +47,7 @@ Before running the application, ensure you have:

**Option B: Build from Source**

1. Clone the repository: `git clone https://github.com/yourusername/Echo.git`
1. Clone the repository: `git clone https://github.com/GithubPhobos/Echo`
2. Navigate to the folder: `cd Echo`
3. Build the project: `dotnet publish -c Release -r win-x64 --self-contained true`

Expand All @@ -58,9 +58,9 @@ That folder contains `start-recording.wav` and `stop-recording.wav` for audible
2. Place your downloaded Whisper model file (e.g., `ggml-medium.bin`) into the `Assets` folder.
3. Open `appsettings.json` to customize the application (all settings are documented inline). Key settings include:
* `WhisperSettings.ModelName`: **Must match** the exact name of the model you placed in the Assets folder.
* `PushToTalkSettings.Key`: The global hotkey to trigger recording (Default is ```).
* `PushToTalkSettings.Key`: The global hotkey to trigger recording (Default is "`").
* `Serilog.MinimumLevel.Default`: Available log levels are `Debug`, `Information`, `Warning`, `Error`.

* `WhisperSettings.Prompt`: The initial context provided to the AI. Use this to specify complex domain terminology, define your preferred punctuation style, or provide a baseline vocabulary to help the model transcribe your speech more accurately.

### 3. Hardware Acceleration Setup 🚀

Expand All @@ -71,19 +71,19 @@ For maximum speed, configure the `HardwareBackend` in `appsettings.json` based o
2. Download the required CUDA redistributable libraries from the [NVIDIA Developer Archive](https://developer.download.nvidia.com/compute/cuda/redist/).
You will need files from `cuda_cudart` and `libcublas`.
3. Extract and place the following specific `.dll` files next to `Echo.exe`:
* `cublas64_13.dll`
* `cublasLt64_13.dll`
* `cudart64_13.dll`
* `cublas64_13.dll`
* `cublasLt64_13.dll`
* `cudart64_13.dll`


* **AMD / Intel / Basic NVIDIA (Vulkan)**
* Works with AMD Adrenalin, Intel Arc Graphics, or standard NVIDIA drivers.
* You don't need to install anything, because the required `vulkan-1.dll` is automatically installed by Windows with your GPU drivers.
* Set `"HardwareBackend": "Vulkan"` in `appsettings.json`.
1. Works with AMD Adrenalin, Intel Arc Graphics, or standard NVIDIA drivers.
2. You don't need to install anything, because the required `vulkan-1.dll` is automatically installed by Windows with your GPU drivers.
3. Set `"HardwareBackend": "Vulkan"` in `appsettings.json`.


* **CPU Only**
* Set `"HardwareBackend": "CPU"`. No extra steps required.
1. Set `"HardwareBackend": "CPU"`. No extra steps required.



Expand Down
Loading