From f4ebc4e99eeb54f84f979acfaee102920c3116de Mon Sep 17 00:00:00 2001 From: Matt Lorentz Date: Wed, 8 Apr 2026 17:10:20 +0000 Subject: [PATCH] Video in calls (#135) #135 This PR adds basic video functionality to our voice rooms. Again I followed the Discord UX for inspiration, so all video calls start as voice-only calls that gracefully upgrade (and downgrade) when someone turns on a video or starts screen sharing. When a video feed is detected the Room page will change to display a grid of feeds. The grid logic is very basic, that's definitely an area to improve in the future. You can open the chat part of the room with a new button on the VoiceWidget - on the desktop layout this creates a split view with video on the left and chat on the right, but on mobile it switches to chat fullscreen. I also added a little pin icon you can use to focus on a single video feed (useful for screen sharing). There is a lot of tailwind I don't understand here, but it seems to work well enough. I moved voice.ts into a new `call` folder and moved some of its stores into `call/stores.ts` which allowed me to keep most of the video logic in `call/video.ts`. It's not a perfect encapsulation as voice.ts does subscribe to some of the hooks for the livekit calls and passes some of the signals onto `video.ts`. This could probably be broken up better but for this PR I'd rather not focus on making it perfect if that's ok. Partly for the sake of time but also because I envision another PR that renames/reorganizes things and I think a larger UX evaluation is necessary and should include real user feedback. I'm not confident tha""t the Voice Room concept as a whole will stick going forward. Maybe all rooms in a livekit enabled server should be able to host a call (like a slack huddle), maybe users want to be able to schedule calls as events, or even have them start with an ad-hoc set of participants completely outside of a NIP-29 group, etc. Co-authored-by: mplorentz Reviewed-on: https://gitea.coracle.social/coracle/flotilla/pulls/135 Co-authored-by: Matt Lorentz Co-committed-by: Matt Lorentz --- android/app/src/main/AndroidManifest.xml | 3 + ios/App/App/Info.plist | 4 +- src/app.css | 12 +- src/app/call/stores.ts | 57 ++++ src/app/call/video.ts | 99 ++++++ src/app/{ => call}/voice.ts | 113 +++--- src/app/components/VideoCallContent.svelte | 278 +++++++++++++++ src/app/components/VideoCallTile.svelte | 31 ++ .../VoiceCallAudioSettingsDialog.svelte | 41 ++- src/app/components/VoiceRoomItem.svelte | 7 +- src/app/components/VoiceRoomJoinDialog.svelte | 2 +- src/app/components/VoiceWidget.svelte | 161 +++++++-- src/app/core/sync.ts | 2 +- src/lib/components/Button.svelte | 1 + src/lib/components/PageContent.svelte | 7 +- src/routes/spaces/[relay]/[h]/+layout.svelte | 11 +- src/routes/spaces/[relay]/[h]/+page.svelte | 323 +++++++++++------- 17 files changed, 943 insertions(+), 209 deletions(-) create mode 100644 src/app/call/stores.ts create mode 100644 src/app/call/video.ts rename src/app/{ => call}/voice.ts (82%) create mode 100644 src/app/components/VideoCallContent.svelte create mode 100644 src/app/components/VideoCallTile.svelte diff --git a/android/app/src/main/AndroidManifest.xml b/android/app/src/main/AndroidManifest.xml index 243166be..bafccd40 100644 --- a/android/app/src/main/AndroidManifest.xml +++ b/android/app/src/main/AndroidManifest.xml @@ -44,4 +44,7 @@ + + + diff --git a/ios/App/App/Info.plist b/ios/App/App/Info.plist index 6d05083f..a4078e58 100644 --- a/ios/App/App/Info.plist +++ b/ios/App/App/Info.plist @@ -24,8 +24,10 @@ LSRequiresIPhoneOS + NSCameraUsageDescription + Flotilla uses the camera when you enable it in a voice room. NSMicrophoneUsageDescription - Flotilla uses the microphone for voice chat in rooms. + Flotilla uses the microphone when you enable it in a voice room. UIBackgroundModes remote-notification diff --git a/src/app.css b/src/app.css index c0a85c58..54c39834 100644 --- a/src/app.css +++ b/src/app.css @@ -22,6 +22,16 @@ @apply pl-sai pr-sai; } +/* root */ + +:root { + font-family: Lato; + --sait: var(--safe-area-inset-top, env(safe-area-inset-top)); + --saib: var(--safe-area-inset-bottom, env(safe-area-inset-bottom)); + --sail: var(--safe-area-inset-left, env(safe-area-inset-left)); + --sair: var(--safe-area-inset-right, env(safe-area-inset-right)); +} + @utility py-sai { @apply pt-sai pb-sai; } @@ -415,7 +425,7 @@ body.keyboard-open .hide-on-keyboard { /* chat view */ .chat__compose { - @apply z-compose relative mb-14 grow md:mb-0; + @apply relative z-compose mb-14 shrink-0 md:mb-0; } .chat__compose .chat__compose-inner { diff --git a/src/app/call/stores.ts b/src/app/call/stores.ts new file mode 100644 index 00000000..abe4084e --- /dev/null +++ b/src/app/call/stores.ts @@ -0,0 +1,57 @@ +import {Room as LiveKitRoom} from "livekit-client" +import {derived, writable} from "svelte/store" +import {type Room} from "@app/core/state" + +export type VoiceSession = { + url: string + h: string + room: LiveKitRoom + muted: boolean + cameraOn: boolean + screenShareOn: boolean +} + +export type Pubkey = string + +export type VoiceParticipant = {pubkey?: Pubkey; identity: string} + +export enum VoiceState { + Joining = "joining", + Connected = "connected", + Disconnected = "disconnected", +} + +export const currentVoiceSession = writable(undefined) + +export const voiceState = writable(VoiceState.Disconnected) + +export const currentVoiceRoom = writable(undefined) + +export const participantPubkeyMap = writable>(new Map()) + +export const pubkeyFromLiveKitIdentity = (identity: string): string | undefined => + /^[a-f0-9]{64}$/.test(identity.slice(0, 64)) ? identity.slice(0, 64) : undefined + +export const participantFromLiveKitIdentity = (identity: string): VoiceParticipant => { + const pk = pubkeyFromLiveKitIdentity(identity) + return pk ? {pubkey: pk, identity} : {identity} +} + +export const participantKey = (p: VoiceParticipant) => p.pubkey ?? p.identity + +export const speakingParticipants = writable([]) + +export const isParticipantSpeaking = derived( + speakingParticipants, + $participants => (p: VoiceParticipant) => + $participants.some(sp => participantKey(sp) === participantKey(p)), +) + +export const isLocalSpeaking = derived( + [currentVoiceSession, speakingParticipants], + ([$session, $speaking]) => { + if (!$session?.room) return false + const local = participantFromLiveKitIdentity($session.room.localParticipant.identity) + return $speaking.some(sp => participantKey(sp) === participantKey(local)) + }, +) diff --git a/src/app/call/video.ts b/src/app/call/video.ts new file mode 100644 index 00000000..eccd4397 --- /dev/null +++ b/src/app/call/video.ts @@ -0,0 +1,99 @@ +import {Track} from "livekit-client" +import {MediaQuery} from "svelte/reactivity" +import {derived, get, writable} from "svelte/store" +import {currentVoiceSession, VoiceState, type VoiceSession, voiceState} from "@app/call/stores" +import {pushToast} from "@app/util/toast" + +export enum VideoCallLayout { + Chat = "chat", + Video = "video", + Split = "split", +} + +export const isDesktopLayout = new MediaQuery("min-width: 768px", false) + +export enum ViewportSize { + Desktop = "desktop", + Mobile = "mobile", +} + +export const videoCallViewportSync = { + previousLayout: undefined as ViewportSize | undefined, +} + +export const videoCallLayout = writable(VideoCallLayout.Split) + +export const resetVideoCallLayout = () => { + videoCallViewportSync.previousLayout = undefined + videoCallLayout.set(VideoCallLayout.Chat) +} + +export const videoPrimaryTileKey = writable(undefined) + +export const toggleVideoPrimaryTile = (key: string) => { + videoPrimaryTileKey.update(k => (k === key ? undefined : key)) +} + +const VISUAL_SOURCES = [Track.Source.Camera, Track.Source.ScreenShare] as const + +const countLiveVisualFeeds = (session: VoiceSession): number => { + const room = session.room + let n = 0 + const lp = room.localParticipant + if (session.cameraOn) { + const pub = lp.getTrackPublication(Track.Source.Camera) + if (pub?.track) n += 1 + } + if (session.screenShareOn) { + const pub = lp.getTrackPublication(Track.Source.ScreenShare) + if (pub?.track) n += 1 + } + for (const rp of room.remoteParticipants.values()) { + for (const source of VISUAL_SOURCES) { + const pub = rp.getTrackPublication(source) + if (pub?.isSubscribed && pub.track) n += 1 + } + } + return n +} + +export const triggerVideoFeedCount = () => { + currentVoiceSession.update(s => (s ? {...s} : s)) +} + +export const videoTileCount = derived([currentVoiceSession, voiceState], ([$session, $state]) => { + if ($state !== VoiceState.Connected || !$session) return 0 + return countLiveVisualFeeds($session) +}) + +export const toggleCamera = async () => { + const session = get(currentVoiceSession) + if (!session) return + + const cameraOn = !session.cameraOn + try { + await session.room.localParticipant.setCameraEnabled(cameraOn) + currentVoiceSession.set({...session, cameraOn}) + } catch { + pushToast({ + theme: "error", + message: cameraOn ? "Could not access camera" : "Could not turn off camera", + }) + } +} + +export const toggleScreenShare = async () => { + const session = get(currentVoiceSession) + if (!session) return + + const screenShareOn = !session.screenShareOn + try { + await session.room.localParticipant.setScreenShareEnabled(screenShareOn) + currentVoiceSession.set({...session, screenShareOn}) + } catch { + pushToast({ + theme: "error", + message: screenShareOn ? "Could not start screen sharing" : "Could not stop screen sharing", + }) + } +} diff --git a/src/app/voice.ts b/src/app/call/voice.ts similarity index 82% rename from src/app/voice.ts rename to src/app/call/voice.ts index 7fb1f241..95cab6a5 100644 --- a/src/app/voice.ts +++ b/src/app/call/voice.ts @@ -4,21 +4,35 @@ */ import { DisconnectReason, + LocalParticipant, + LocalTrackPublication, Room as LiveKitRoom, RoomEvent, Track, supportsAudioOutputSelection, type AudioCaptureOptions, - type LocalParticipant, } from "livekit-client" -import {derived, get, writable} from "svelte/store" +import {derived, get} from "svelte/store" import {map, removeUndefined, uniqBy} from "@welshman/lib" import type {TrustedEvent} from "@welshman/util" import {makeHttpAuth, makeHttpAuthHeader, getTags} from "@welshman/util" import {signer} from "@welshman/app" import {getLivekitEndpoint} from "$lib/livekit" import {AbortError, whenAborted, whenTimeout} from "$lib/util" -import {deriveLatestEventForUrl, deriveRoom, makeRoomId, type Room} from "@app/core/state" +import { + currentVoiceRoom, + currentVoiceSession, + participantFromLiveKitIdentity, + participantKey, + participantPubkeyMap, + pubkeyFromLiveKitIdentity, + speakingParticipants, + VoiceState, + type VoiceParticipant, + voiceState, +} from "@app/call/stores" +import {resetVideoCallLayout, triggerVideoFeedCount, videoPrimaryTileKey} from "@app/call/video" +import {deriveLatestEventForUrl, deriveRoom, makeRoomId} from "@app/core/state" import {pushToast} from "@app/util/toast" export const LIVEKIT_PARTICIPANTS = 39004 @@ -27,30 +41,12 @@ export {checkRelayHasLivekit} from "$lib/livekit" export {supportsAudioOutputSelection} -export type VoiceSession = { - url: string - h: string - room: LiveKitRoom - muted: boolean -} - -export type Pubkey = string - -export type VoiceParticipant = {pubkey?: Pubkey; identity: string} - -export enum VoiceState { - Joining = "joining", - Connected = "connected", - Disconnected = "disconnected", -} - -export const currentVoiceSession = writable(undefined) - const LIVEKIT_DEFAULT_DEVICE_ID = "default" export enum DeviceKind { AudioInput = "audioinput", AudioOutput = "audiooutput", + VideoInput = "videoinput", } export const switchVoiceActiveDevice = async ( @@ -71,17 +67,14 @@ export const switchVoiceActiveDevice = async ( case DeviceKind.AudioOutput: label = "speaker" break + case DeviceKind.VideoInput: + label = "camera" + break } pushToast({theme: "error", message: `Error changing ${label}`}) } } -export const voiceState = writable(VoiceState.Disconnected) - -export const currentVoiceRoom = writable(undefined) - -export const participantPubkeyMap = writable>(new Map()) - const addParticipant = (identity: string) => { participantPubkeyMap.update(m => { const next = new Map(m) @@ -98,24 +91,6 @@ const deleteParticipant = (identity: string) => { }) } -export const pubkeyFromLiveKitIdentity = (identity: string): string | undefined => - /^[a-f0-9]{64}$/.test(identity.slice(0, 64)) ? identity.slice(0, 64) : undefined - -export const participantFromLiveKitIdentity = (identity: string): VoiceParticipant => { - const pk = pubkeyFromLiveKitIdentity(identity) - return pk ? {pubkey: pk, identity} : {identity} -} - -export const participantKey = (p: VoiceParticipant) => p.pubkey ?? p.identity - -export const speakingParticipants = writable([]) - -export const isParticipantSpeaking = derived( - speakingParticipants, - $participants => (p: VoiceParticipant) => - $participants.some(sp => participantKey(sp) === participantKey(p)), -) - const fetchLivekitToken = async ( url: string, groupId: string, @@ -197,7 +172,9 @@ const setUpMicrophone = async ( } const onRoomDisconnected = (reason?: DisconnectReason) => { + videoPrimaryTileKey.set(undefined) currentVoiceSession.set(undefined) + resetVideoCallLayout() if (reason !== undefined && reason !== DisconnectReason.CLIENT_INITIATED) { voiceState.set(VoiceState.Disconnected) const message = @@ -216,11 +193,16 @@ const onTrackSubscribed = (track: Track) => { element.style.display = "none" document.body.appendChild(element) element.play().catch(() => {}) + } else if (track.kind === Track.Kind.Video) { + triggerVideoFeedCount() } } const onTrackUnsubscribed = (track: Track) => { track.detach().forEach(el => el.remove()) + if (track.kind === Track.Kind.Video) { + triggerVideoFeedCount() + } } const onActiveSpeakersChanged = (participants: {identity: string}[]) => { @@ -241,6 +223,17 @@ const onParticipantDisconnected = (participant: {identity: string}) => { deleteParticipant(participant.identity) } +const onLocalTrackUnpublished = ( + publication: LocalTrackPublication, + participant: LocalParticipant, +) => { + if (publication.source !== Track.Source.ScreenShare) return + const session = get(currentVoiceSession) + if (!session || participant.identity !== session.room.localParticipant.identity) return + if (!session.screenShareOn) return + currentVoiceSession.set({...session, screenShareOn: false}) +} + let joinAbortController: AbortController | undefined export const cancelJoinVoiceRoom = () => { @@ -278,6 +271,7 @@ export const joinVoiceRoom = async ( liveKitRoom.on(RoomEvent.ParticipantDisconnected, onParticipantDisconnected) liveKitRoom.on(RoomEvent.TrackSubscribed, onTrackSubscribed) liveKitRoom.on(RoomEvent.TrackUnsubscribed, onTrackUnsubscribed) + liveKitRoom.on(RoomEvent.LocalTrackUnpublished, onLocalTrackUnpublished) liveKitRoom.on(RoomEvent.ActiveSpeakersChanged, onActiveSpeakersChanged) try { @@ -301,7 +295,14 @@ export const joinVoiceRoom = async ( const muted = await setUpMicrophone(startMuted, preferredMicId, liveKitRoom.localParticipant) - currentVoiceSession.set({url, h, room: liveKitRoom, muted}) + currentVoiceSession.set({ + url, + h, + room: liveKitRoom, + muted, + cameraOn: false, + screenShareOn: false, + }) voiceState.set(VoiceState.Connected) playJoinSound() } catch (e) { @@ -320,8 +321,26 @@ export const leaveVoiceRoom = async () => { const audio = new Audio("/leave-voice-room.mp3") audio.play().catch(() => {}) + if (session.cameraOn) { + try { + await session.room.localParticipant.setCameraEnabled(false) + } catch { + pushToast({theme: "error", message: "Error turning off camera."}) + } + } + + if (session.screenShareOn) { + try { + await session.room.localParticipant.setScreenShareEnabled(false) + } catch { + pushToast({theme: "error", message: "Error turning off screen sharing."}) + } + } + voiceState.set(VoiceState.Disconnected) + videoPrimaryTileKey.set(undefined) currentVoiceSession.set(undefined) + resetVideoCallLayout() session.room.disconnect() speakingParticipants.set([]) participantPubkeyMap.set(new Map()) diff --git a/src/app/components/VideoCallContent.svelte b/src/app/components/VideoCallContent.svelte new file mode 100644 index 00000000..ce65a7ed --- /dev/null +++ b/src/app/components/VideoCallContent.svelte @@ -0,0 +1,278 @@ + + +{#snippet videoTile(tile: VideoTileData, layout: TileLayout)} +
+ {#if tile.track} + + {:else} +
+ +
+ {/if} + + {labelFor(tile.identity, tile.source)}{tile.isLocal ? " (you)" : ""} + + {#if videoTiles.length > 1} + {@const pinned = $videoPrimaryTileKey === tileKey(tile)} + + {/if} +
+{/snippet} + +{#snippet videoPanelBody()} + {#if showTileGrid} + {#if useSpotlightLayout && primaryTile} +
+ {@render videoTile(primaryTile, "spotlight")} + {#if secondaryTiles.length > 0} +
+ {#each secondaryTiles as tile (tileKey(tile))} + {@render videoTile(tile, "strip")} + {/each} +
+ {/if} +
+ {:else if useMultiGrid} +
+ {#each videoTiles as tile (tileKey(tile))} + {@render videoTile(tile, "default")} + {/each} +
+ {:else} +
+ {#each videoTiles as tile (tileKey(tile))} + {@render videoTile(tile, "default")} + {/each} +
+ {/if} + {:else} +
+

No camera or screen share yet.

+

Use the camera or screen share control to share video.

+
+ {/if} +{/snippet} + +{#if showVideoContent} +
+ {#if mobile} +
+
+ {@render videoPanelBody()} +
+
+ +
+
+ {:else} + {@render videoPanelBody()} + {/if} +
+{/if} diff --git a/src/app/components/VideoCallTile.svelte b/src/app/components/VideoCallTile.svelte new file mode 100644 index 00000000..6f8db1f9 --- /dev/null +++ b/src/app/components/VideoCallTile.svelte @@ -0,0 +1,31 @@ + + + diff --git a/src/app/components/VoiceCallAudioSettingsDialog.svelte b/src/app/components/VoiceCallAudioSettingsDialog.svelte index c8afccc0..a5fab918 100644 --- a/src/app/components/VoiceCallAudioSettingsDialog.svelte +++ b/src/app/components/VoiceCallAudioSettingsDialog.svelte @@ -7,13 +7,8 @@ import ModalHeader from "@lib/components/ModalHeader.svelte" import ModalSubtitle from "@lib/components/ModalSubtitle.svelte" import ModalTitle from "@lib/components/ModalTitle.svelte" - import { - currentVoiceSession, - DeviceKind, - supportsAudioOutputSelection, - switchVoiceActiveDevice, - type VoiceSession, - } from "@app/voice" + import {currentVoiceSession, type VoiceSession} from "@app/call/stores" + import {DeviceKind, supportsAudioOutputSelection, switchVoiceActiveDevice} from "@app/call/voice" import {popModal} from "@app/util/modal" const selectValueForActiveDevice = (session: VoiceSession, kind: DeviceKind): string => { @@ -26,8 +21,10 @@ let audioInputs = $state([]) let audioOutputs = $state([]) + let videoInputs = $state([]) let selectedInput = $state("") let selectedOutput = $state("") + let selectedVideo = $state("") const loadDevices = async () => { if (!navigator.mediaDevices?.enumerateDevices) return @@ -35,9 +32,11 @@ const devices = await navigator.mediaDevices.enumerateDevices() audioInputs = devices.filter(d => d.kind === "audioinput") audioOutputs = devices.filter(d => d.kind === "audiooutput") + videoInputs = devices.filter(d => d.kind === "videoinput") } catch { audioInputs = [] audioOutputs = [] + videoInputs = [] } } @@ -55,6 +54,7 @@ } selectedInput = selectValueForActiveDevice(session, DeviceKind.AudioInput) selectedOutput = selectValueForActiveDevice(session, DeviceKind.AudioOutput) + selectedVideo = selectValueForActiveDevice(session, DeviceKind.VideoInput) }) const onInputChange = () => { @@ -65,6 +65,10 @@ void switchVoiceActiveDevice(DeviceKind.AudioOutput, selectedOutput) } + const onVideoChange = () => { + void switchVoiceActiveDevice(DeviceKind.VideoInput, selectedVideo) + } + const onDone = () => { popModal() } @@ -76,8 +80,8 @@ - Audio settings - Choose microphone and speaker for this call. + Call settings + Microphone, speaker, and camera for this call.
@@ -120,6 +124,25 @@ {/snippet} {/if} + + {#snippet label()} +

Camera

+ {/snippet} + {#snippet input()} + + {/snippet} +
diff --git a/src/app/components/VoiceRoomItem.svelte b/src/app/components/VoiceRoomItem.svelte index 26b7f709..965d1ba4 100644 --- a/src/app/components/VoiceRoomItem.svelte +++ b/src/app/components/VoiceRoomItem.svelte @@ -12,14 +12,13 @@ import {makeRoomId} from "@app/core/state" import { VoiceState, - deriveVoiceParticipants, - cancelJoinVoiceRoom, currentVoiceRoom, - voiceState, isParticipantSpeaking, participantKey, + voiceState, type VoiceParticipant, - } from "@app/voice" + } from "@app/call/stores" + import {cancelJoinVoiceRoom, deriveVoiceParticipants} from "@app/call/voice" interface Props { url: string diff --git a/src/app/components/VoiceRoomJoinDialog.svelte b/src/app/components/VoiceRoomJoinDialog.svelte index 8bba88b5..7da58f63 100644 --- a/src/app/components/VoiceRoomJoinDialog.svelte +++ b/src/app/components/VoiceRoomJoinDialog.svelte @@ -14,7 +14,7 @@ import ModalTitle from "@lib/components/ModalTitle.svelte" import {AbortError, TimeoutError} from "$lib/util" import {displayRoom} from "@app/core/state" - import {joinVoiceRoom} from "@app/voice" + import {joinVoiceRoom} from "@app/call/voice" import {popModal} from "@app/util/modal" import {pushToast} from "@app/util/toast" diff --git a/src/app/components/VoiceWidget.svelte b/src/app/components/VoiceWidget.svelte index 1ed8356e..c8601644 100644 --- a/src/app/components/VoiceWidget.svelte +++ b/src/app/components/VoiceWidget.svelte @@ -1,15 +1,20 @@ {#if targetRoom} @@ -76,19 +132,47 @@ in:fly={{y: 60, duration: 350}} out:fly={{y: 60, duration: 250}} class="flex flex-col gap-2 rounded-box bg-base-100 p-3"> -
- {#if $voiceState === VoiceState.Joining} - Joining... - {:else if $voiceState === VoiceState.Connected} - Voice Connected - {:else} - Disconnected +
+ + {#if showChatButton} + {/if} - - {roomName} / {spaceName} -
-
+
{#if $voiceState === VoiceState.Joining} + {#if !Capacitor.isNativePlatform()} + + {/if} + - {:else} - - {/if} -
-
- {:else} - {#if loadingForward} -

- Looking for messages... -

- {/if} - {#each elements as { type, id, value, showPubkey, addSpaceBelow } (id)} - {#if type === "new-messages"} -
-
-

New Messages

-
-
- {:else if type === "date"} - {value} - {:else} - {@const event = $state.snapshot(value as TrustedEvent)} - {#if event.kind === ROOM_ADD_MEMBER} - - {:else} -
- -
- {/if} - {/if} - {/each} -

- {#if loadingBackward} - Looking for messages... - {:else} - End of message history - {/if} -

+
+ {#if voiceConnectedHere} +