Use endpoint IDs instead of device friendly names to store user's pre…

…ferred output device (#17547) Closes #17497 Summary of the issue: NVDA currently stores the friendly name of the user's preferred audio output device. Description of user facing changes None. Description of development approach In `nvdaHelper/local/wasapi.cpp`, rewrote `getPreferredDevice` to fetch the preferred device directly via `MMDeviceEnumerator.GetDevice`. Added manual checks that the fetched device is a render device, and that its status is active, since these conditions were guaranteed to be met since the previous code only iterated over devices which met those prerequisites. Renamed `deviceName` to `endpointId`. In `source/mmwave.py`, added a parameter to `_getOutputDevices` to return a value representing the system default output device. Also made the type hints more self-documenting by using a `NamedTuple`. In `source/gui/settingsDialogs.py`, used `nvwave._getOutputDevices` rather than `nvwave.getOutputDeviceNames` to fetch the available output devices. When saving, used the selection index of `AudioSettingsPanel.deviceList` to index into the tuple of IDs to get the value to save to config. In `source/config/configSpec.py`, moved the `outputDevice` key from `speech` to `audio`, and incremented the schema version to 14. Added an associated profile upgrade function in `profileUpgradeSteps.py`, and tests for same in `tests/unit/test_config.py`. Updated all occurrences of this config key that I am aware of to point to the new location. In `source/synthDrivers/sapi5.py`, rewrote the device selection logic again to work with endpoint IDs. Testing strategy: Built from source and ensured that changing output devices works as expected. Ensured that saving the config worked, and that the output device was selected correctly when restarting NVDA. Tested activating SAPI5 with different output devices selected. Known issues with pull request: SAPI4 still doesn't work (#17516 ), this will be fixed in a future PR. Endpoint ID strings are not human-readable.
nvaccess · Jan 6, 2025 · 0f7e961 · 0f7e961 · ruifontes · Jan 6, 2025
1 parent a7fa0d6
commit 0f7e961
Show file tree

Hide file tree

Showing 13 changed files with 299 additions and 85 deletions.
diff --git a/nvdaHelper/local/wasapi.cpp b/nvdaHelper/local/wasapi.cpp
@@ -1,7 +1,7 @@
 /*
 This file is a part of the NVDA project.
 URL: http://www.nvda-project.org/
-Copyright 2023 James Teh.
+Copyright 2023-2024 NV Access Limited, James Teh.
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License version 2.0, as published by
     the Free Software Foundation.
@@ -45,6 +45,7 @@ const IID IID_IMMNotificationClient = __uuidof(IMMNotificationClient);
 const IID IID_IAudioStreamVolume = __uuidof(IAudioStreamVolume);
 const IID IID_IAudioSessionManager2 = __uuidof(IAudioSessionManager2);
 const IID IID_IAudioSessionControl2 = __uuidof(IAudioSessionControl2);
+const IID IID_IMMEndpoint = __uuidof(IMMEndpoint);
 
 /**
  * C++ RAII class to manage the lifecycle of a standard Windows HANDLE closed
@@ -167,9 +168,9 @@ class WasapiPlayer {
 
 	/**
 	 * Constructor.
-	 * Specify an empty (not null) deviceName to use the default device.
+	 * Specify an empty (not null) endpointId to use the default device.
 	 */
-	WasapiPlayer(wchar_t* deviceName, WAVEFORMATEX format,
+	WasapiPlayer(wchar_t* endpointId, WAVEFORMATEX format,
 		ChunkCompletedCallback callback);
 
 	/**
@@ -229,7 +230,7 @@ class WasapiPlayer {
 	CComPtr<IAudioClock> clock;
 	// The maximum number of frames that will fit in the buffer.
 	UINT32 bufferFrames;
-	std::wstring deviceName;
+	std::wstring endpointId;
 	WAVEFORMATEX format;
 	ChunkCompletedCallback callback;
 	PlayState playState = PlayState::stopped;
@@ -246,9 +247,9 @@ class WasapiPlayer {
 	bool isUsingPreferredDevice = false;
 };
 
-WasapiPlayer::WasapiPlayer(wchar_t* deviceName, WAVEFORMATEX format,
+WasapiPlayer::WasapiPlayer(wchar_t* endpointId, WAVEFORMATEX format,
 	ChunkCompletedCallback callback)
-: deviceName(deviceName), format(format), callback(callback) {
+: endpointId(endpointId), format(format), callback(callback) {
 	wakeEvent = CreateEvent(nullptr, false, false, nullptr);
 }
 
@@ -266,7 +267,7 @@ HRESULT WasapiPlayer::open(bool force) {
 	}
 	CComPtr<IMMDevice> device;
 	isUsingPreferredDevice = false;
-	if (deviceName.empty()) {
+	if (endpointId.empty()) {
 		hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole, &device);
 	} else {
 		hr = getPreferredDevice(device);
@@ -491,48 +492,47 @@ HRESULT WasapiPlayer::getPreferredDevice(CComPtr<IMMDevice>& preferredDevice) {
 	if (FAILED(hr)) {
 		return hr;
 	}
-	CComPtr<IMMDeviceCollection> devices;
-	hr = enumerator->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &devices);
+	CComPtr<IMMDevice> device;
+	hr = enumerator->GetDevice(endpointId.c_str(), &device);
 	if (FAILED(hr)) {
 		return hr;
 	}
-	UINT count = 0;
-	devices->GetCount(&count);
-	for (UINT d = 0; d < count; ++d) {
-		CComPtr<IMMDevice> device;
-		hr = devices->Item(d, &device);
-		if (FAILED(hr)) {
-			return hr;
-		}
-		CComPtr<IPropertyStore> props;
-		hr = device->OpenPropertyStore(STGM_READ, &props);
-		if (FAILED(hr)) {
-			return hr;
-		}
-		PROPVARIANT val;
-		hr = props->GetValue(PKEY_Device_FriendlyName, &val);
-		if (FAILED(hr)) {
-			return hr;
-		}
-		// WinMM device names are truncated to MAXPNAMELEN characters, including the
-		// null terminator.
-		constexpr size_t MAX_CHARS = MAXPNAMELEN - 1;
-		if (wcsncmp(val.pwszVal, deviceName.c_str(), MAX_CHARS) == 0) {
-			PropVariantClear(&val);
-			preferredDevice = std::move(device);
-			return S_OK;
-		}
-		PropVariantClear(&val);
+
+	// We only want to use the device if it is plugged in and enabled.
+	DWORD state;
+	hr = device->GetState(&state);
+	if (FAILED(hr)) {
+		return hr;
+	} else if (state != DEVICE_STATE_ACTIVE) {
+		return E_NOTFOUND;
 	}
-	return E_NOTFOUND;
+
+	// We only want to use the device if it is an output device.
+	IMMEndpoint* endpoint;
+	hr = device->QueryInterface(IID_IMMEndpoint, (void**)&endpoint);
+	if (FAILED(hr)) {
+		return hr;
+	}
+	EDataFlow dataFlow;
+	hr = endpoint->GetDataFlow(&dataFlow);
+	if (FAILED(hr)) {
+		return hr;
+	} else if (dataFlow != eRender) {
+		return E_NOTFOUND;
+	}
+	preferredDevice = std::move(device);
+	endpoint->Release();
+	device.Release();
+	enumerator.Release();
+	return S_OK;
 }
 
 bool WasapiPlayer::didPreferredDeviceBecomeAvailable() {
 	if (
 		// We're already using the preferred device.
 		isUsingPreferredDevice ||
 		// A preferred device was not specified.
-		deviceName.empty() ||
+		endpointId.empty() ||
 		// A device hasn't recently changed state.
 		deviceStateChangeCount == notificationClient->getDeviceStateChangeCount()
 	) {
@@ -673,7 +673,7 @@ HRESULT WasapiPlayer::disableCommunicationDucking(IMMDevice* device) {
  */
 class SilencePlayer {
 	public:
-	SilencePlayer(wchar_t* deviceName);
+	SilencePlayer(wchar_t* endpointId);
 	HRESULT init();
 	// Play silence for the specified duration.
 	void playFor(DWORD ms, float volume);
@@ -698,8 +698,8 @@ class SilencePlayer {
 	std::vector<INT16> whiteNoiseData;
 };
 
-SilencePlayer::SilencePlayer(wchar_t* deviceName):
-player(deviceName, getFormat(), nullptr),
+SilencePlayer::SilencePlayer(wchar_t* endpointId):
+player(endpointId, getFormat(), nullptr),
 whiteNoiseData(
 	SILENCE_BYTES  / (
 		sizeof(INT16) / sizeof(unsigned char)
@@ -791,10 +791,10 @@ void SilencePlayer::terminate() {
  * WasapiPlayer or SilencePlayer, with the exception of wasPlay_startup.
  */
 
-WasapiPlayer* wasPlay_create(wchar_t* deviceName, WAVEFORMATEX format,
+WasapiPlayer* wasPlay_create(wchar_t* endpointId, WAVEFORMATEX format,
 	WasapiPlayer::ChunkCompletedCallback callback
 ) {
-	return new WasapiPlayer(deviceName, format, callback);
+	return new WasapiPlayer(endpointId, format, callback);
 }
 
 void wasPlay_destroy(WasapiPlayer* player) {
@@ -855,9 +855,9 @@ HRESULT wasPlay_startup() {
 
 SilencePlayer* silence = nullptr;
 
-HRESULT wasSilence_init(wchar_t* deviceName) {
+HRESULT wasSilence_init(wchar_t* endpointId) {
 	assert(!silence);
-	silence = new SilencePlayer(deviceName);
+	silence = new SilencePlayer(endpointId);
 	return silence->init();
 }
 

diff --git a/source/config/configSpec.py b/source/config/configSpec.py
@@ -13,7 +13,7 @@
 #: provide an upgrade step (@see profileUpgradeSteps.py). An upgrade step does not need to be added when
 #: just adding a new element to (or removing from) the schema, only when old versions of the config
 #: (conforming to old schema versions) will not work correctly with the new schema.
-latestSchemaVersion = 13
+latestSchemaVersion = 14
 
 #: The configuration specification string
 #: @type: String
@@ -41,7 +41,6 @@
 	includeCLDR = boolean(default=True)
 	symbolDictionaries = string_list(default=list("cldr"))
 	beepSpeechModePitch = integer(default=10000,min=50,max=11025)
-	outputDevice = string(default=default)
 	autoLanguageSwitching = boolean(default=true)
 	autoDialectSwitching = boolean(default=false)
 	delayedCharacterDescriptions = boolean(default=false)
@@ -55,6 +54,7 @@
 
 # Audio settings
 [audio]
+	outputDevice = string(default=default)
 	audioDuckingMode = integer(default=0)
 	soundVolumeFollowsVoice = boolean(default=false)
 	soundVolume = integer(default=100, min=0, max=100)

diff --git a/source/config/profileUpgradeSteps.py b/source/config/profileUpgradeSteps.py
@@ -414,3 +414,60 @@ def upgradeConfigFrom_12_to_13(profile: ConfigObj) -> None:
 	log.debug(
 		f"Handled cldr value of {setting!r}. List is now: {profile['speech']['symbolDictionaries']}",
 	)
+
+
+def upgradeConfigFrom_13_to_14(profile: ConfigObj):
+	"""Set [audio][outputDevice] to the endpointID of [speech][outputDevice], and delete the latter."""
+	try:
+		friendlyName = profile["speech"]["outputDevice"]
+	except KeyError:
+		log.debug("Output device not present in config. Taking no action.")
+		return
+	if friendlyName == "default":
+		log.debug("Output device is set to default. Not writing a new value to config.")
+	elif endpointId := _friendlyNameToEndpointId(friendlyName):
+		log.debug(
+			f"Best match for device with {friendlyName=} has {endpointId=}. Writing new value to config.",
+		)
+		if "audio" not in profile:
+			profile["audio"] = {}
+		profile["audio"]["outputDevice"] = endpointId
+	else:
+		log.debug(
+			f"Could not find an audio output device with {friendlyName=}. Not writing a new value to config.",
+		)
+	log.debug("Deleting old config value.")
+	del profile["speech"]["outputDevice"]
+
+
+def _friendlyNameToEndpointId(friendlyName: str) -> str | None:
+	"""Convert a device friendly name to an endpoint ID string.
+
+	Since friendly names are not unique, there may be many devices on one system with the same friendly name.
+	As the order of devices in an IMMEndpointEnumerator is arbitrary, we cannot assume that the first device with a matching friendly name is the device the user wants.
+	We also can't guarantee that the device the user has selected is active, so we need to retrieve devices by state, in order from most to least preferable.
+	It is probably a safe bet that the device the user wants to use is either active or unplugged.
+	Thus, the preference order for states is:
+	1. ACTIVE- The audio adapter that connects to the endpoint device is present and enabled.
+	   In addition, if the endpoint device plugs into a jack on the adapter, then the endpoint device is plugged in.
+	2. UNPLUGGED - The audio adapter that contains the jack for the endpoint device is present and enabled, but the endpoint device is not plugged into the jack.
+	3. DISABLED - The user has disabled the device in the Windows multimedia control panel.
+	4. NOTPRESENT - The audio adapter that connects to the endpoint device has been removed from the system, or the user has disabled the adapter device in Device Manager.
+	Within a state, if there is more than one device with the selected friendly name, we use the first one.
+
+	:param friendlyName: Friendly name of the device to search for.
+	:return: Endpoint ID string of the best match device, or `None` if no device with a matching friendly name is available.
+	"""
+	from nvwave import _getOutputDevices
+	from pycaw.constants import DEVICE_STATE
+
+	states = (DEVICE_STATE.ACTIVE, DEVICE_STATE.UNPLUGGED, DEVICE_STATE.DISABLED, DEVICE_STATE.NOTPRESENT)
+	for state in states:
+		try:
+			return next(
+				device for device in _getOutputDevices(stateMask=state) if device.friendlyName == friendlyName
+			).id
+		except StopIteration:
+			# Proceed to the next device state.
+			continue
+	return None
diff --git a/source/gui/settingsDialogs.py b/source/gui/settingsDialogs.py
@@ -3041,17 +3041,15 @@ def makeSettings(self, settingsSizer: wx.BoxSizer) -> None:
 		# Translators: This is the label for the select output device combo in NVDA audio settings.
 		# Examples of an output device are default soundcard, usb headphones, etc.
 		deviceListLabelText = _("Audio output &device:")
-		# The Windows Core Audio device enumeration does not have the concept of an ID for the default output device, so we have to insert something ourselves instead.
-		# Translators: Value to show when choosing to use the default audio output device.
-		deviceNames = (_("Default output device"), *nvwave.getOutputDeviceNames())
+		self._deviceIds, deviceNames = zip(*nvwave._getOutputDevices(includeDefault=True))
 		self.deviceList = sHelper.addLabeledControl(deviceListLabelText, wx.Choice, choices=deviceNames)
 		self.bindHelpEvent("SelectSynthesizerOutputDevice", self.deviceList)
-		selectedOutputDevice = config.conf["speech"]["outputDevice"]
-		if selectedOutputDevice == "default":
+		selectedOutputDevice = config.conf["audio"]["outputDevice"]
+		if selectedOutputDevice == config.conf.getConfigValidation(("audio", "outputDevice")).default:
 			selection = 0
 		else:
 			try:
-				selection = deviceNames.index(selectedOutputDevice)
+				selection = self._deviceIds.index(selectedOutputDevice)
 			except ValueError:
 				selection = 0
 		self.deviceList.SetSelection(selection)
@@ -3176,13 +3174,10 @@ def _appendSoundSplitModesList(self, settingsSizerHelper: guiHelper.BoxSizerHelp
 		self.soundSplitModesList.Select(0)
 
 	def onSave(self):
-		# We already use "default" as the key in the config spec, so use it here as an alternative to Microsoft Sound Mapper.
-		selectedOutputDevice = (
-			"default" if self.deviceList.GetSelection() == 0 else self.deviceList.GetStringSelection()
-		)
-		if config.conf["speech"]["outputDevice"] != selectedOutputDevice:
+		selectedOutputDevice = self._deviceIds[self.deviceList.GetSelection()]
+		if config.conf["audio"]["outputDevice"] != selectedOutputDevice:
 			# Synthesizer must be reload if output device changes
-			config.conf["speech"]["outputDevice"] = selectedOutputDevice
+			config.conf["audio"]["outputDevice"] = selectedOutputDevice
 			currentSynth = getSynth()
 			if not setSynth(currentSynth.name):
 				_synthWarningDialog(currentSynth.name)

diff --git a/source/nvwave.py b/source/nvwave.py
@@ -1,10 +1,10 @@
 # A part of NonVisual Desktop Access (NVDA)
-# Copyright (C) 2007-2023 NV Access Limited, Aleksey Sadovoy, Cyrille Bougot, Peter Vágner, Babbage B.V.,
+# Copyright (C) 2007-2024 NV Access Limited, Aleksey Sadovoy, Cyrille Bougot, Peter Vágner, Babbage B.V.,
 # Leonard de Ruijter, James Teh
 # This file is covered by the GNU General Public License.
 # See the file COPYING for more details.
 
-"""Provides a simple Python interface to playing audio using the Windows multimedia waveOut functions, as well as other useful utilities."""
+"""Provides a simple Python interface to playing audio using the Windows Audio Session API (WASAPI), as well as other useful utilities."""
 
 from collections.abc import Generator
 import threading
@@ -89,19 +89,40 @@ class AudioPurpose(Enum):
 	SOUNDS = auto()
 
 
-def _getOutputDevices() -> Generator[tuple[str, str]]:
-	"""Generator, yielding device ID and device Name in device ID order.
-	..note: Depending on number of devices being fetched, this may take some time (~3ms)
+class _AudioOutputDevice(typing.NamedTuple):
+	id: str
+	friendlyName: str
+
+
+def _getOutputDevices(
+	*,
+	includeDefault: bool = False,
+	stateMask: DEVICE_STATE = DEVICE_STATE.ACTIVE,
+) -> Generator[_AudioOutputDevice]:
+	"""Generator, yielding device ID and device Name.
+	.. note:: Depending on number of devices being fetched, this may take some time (~3ms)
+
+	:param includeDefault: Whether to include a value representing the system default output device in the generator, defaults to False.
+		.. note:: The ID of this device is **not** a valid mmdevice endpoint ID string, and is for internal use only.
+			The friendly name is **not** generated by the operating system, and it is highly unlikely that it will match any real output device.
+	:param state: What device states to include in the resultant generator, defaults to DEVICE_STATE.ACTIVE.
+	:return: Generator of :class:`_AudioOutputDevices` containing all enabled and present audio output devices on the system.
 	"""
+	if includeDefault:
+		yield _AudioOutputDevice(
+			id=typing.cast(str, config.conf.getConfigValidation(("audio", "outputDevice")).default),
+			# Translators: Value to show when choosing to use the default audio output device.
+			friendlyName=_("Default output device"),
+		)
 	endpointCollection = AudioUtilities.GetDeviceEnumerator().EnumAudioEndpoints(
 		EDataFlow.eRender.value,
-		DEVICE_STATE.ACTIVE.value,
+		stateMask.value,
 	)
 	for i in range(endpointCollection.GetCount()):
 		device = AudioUtilities.CreateDevice(endpointCollection.Item(i))
 		# This should never be None, but just to be sure
 		if device is not None:
-			yield device.id, device.FriendlyName
+			yield _AudioOutputDevice(device.id, device.FriendlyName)
 		else:
 			continue
 
@@ -194,7 +215,7 @@ def play():
 		channels=f.getnchannels(),
 		samplesPerSec=f.getframerate(),
 		bitsPerSample=f.getsampwidth() * 8,
-		outputDevice=config.conf["speech"]["outputDevice"],
+		outputDevice=config.conf["audio"]["outputDevice"],
 		wantDucking=False,
 		purpose=AudioPurpose.SOUNDS,
 	)
@@ -247,7 +268,7 @@ class WasapiWavePlayer(garbageHandler.TrackedObject):
 	#: Whether there is a pending stream idle check.
 	_isIdleCheckPending: bool = False
 	#: Use the default device, this is the configSpec default value.
-	DEFAULT_DEVICE_KEY = typing.cast(str, config.conf.getConfigValidation(("speech", "outputDevice")).default)
+	DEFAULT_DEVICE_KEY = typing.cast(str, config.conf.getConfigValidation(("audio", "outputDevice")).default)
 	#: The silence output device, None if not initialized.
 	_silenceDevice: typing.Optional[str] = None