From c5b34c2b24e83a7df4f71419fa50397b661250d4 Mon Sep 17 00:00:00 2001
From: Justin Williams <jblwilliams@gmail.com>
Date: Wed, 29 Oct 2025 13:52:30 -0400
Subject: [PATCH 1/2] Modify ServerEvents to match new responses from Realtime
 API Remove model from connect API and rely on configuration changes or
 initial client_secrets API call to configure Add conversationItemAdded server
 event and use previousItemId to place entries in correct position

---
 Sources/Core/Models/ServerEvent.swift         | 10 +++++--
 Sources/Core/Models/Session.swift             | 15 +++++-----
 Sources/UI/Conversation.swift                 | 28 +++++++++++++++++--
 .../Extensions/RealtimeAPI+WebRTC.swift       |  4 +--
 .../WebRTC/Extensions/URLRequest+WebRTC.swift |  6 ++--
 Sources/WebRTC/WebRTCConnector.swift          |  1 +
 6 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/Sources/Core/Models/ServerEvent.swift b/Sources/Core/Models/ServerEvent.swift
index c8ff0af..949c449 100644
--- a/Sources/Core/Models/ServerEvent.swift
+++ b/Sources/Core/Models/ServerEvent.swift
@@ -22,6 +22,11 @@ import MetaCodable
 		public var token: String
 	}
 
+    public struct Usage: Equatable, Hashable, Codable, Sendable {
+        let type: String
+        let seconds: Int
+    }
+
 	/// Returned when an error occurs.
 	/// - Parameter eventId: The unique ID of the server event.
 	/// - Parameter error: Details of the error.
@@ -90,7 +95,7 @@ import MetaCodable
 		contentIndex: Int,
 		transcript: String,
 		logprobs: [LogProb]?,
-		usage: Response.Usage
+		usage: Usage
 	)
 
 	/// Returned when the text value of an input audio transcription content part is updated.
@@ -107,6 +112,7 @@ import MetaCodable
 		itemId: String,
 		contentIndex: Int,
 		delta: String,
+        obfuscation: String,
 		logprobs: [LogProb]?
 	)
 
@@ -536,7 +542,7 @@ extension ServerEvent: Identifiable {
 			case let .conversationItemDone(id, _, _): id
 			case let .conversationItemRetrieved(id, _): id
 			case let .conversationItemInputAudioTranscriptionCompleted(id, _, _, _, _, _): id
-			case let .conversationItemInputAudioTranscriptionDelta(id, _, _, _, _): id
+			case let .conversationItemInputAudioTranscriptionDelta(id, _, _, _, _, _): id
 			case let .conversationItemInputAudioTranscriptionSegment(id, _, _, _, _, _, _, _): id
 			case let .conversationItemInputAudioTranscriptionFailed(id, _, _, _): id
 			case let .conversationItemTruncated(id, _, _, _): id
diff --git a/Sources/Core/Models/Session.swift b/Sources/Core/Models/Session.swift
index e7ab4bc..4abfaf4 100644
--- a/Sources/Core/Models/Session.swift
+++ b/Sources/Core/Models/Session.swift
@@ -95,7 +95,7 @@ import HelperCoders
 			}
 
 			/// Configuration for turn detection
-			public struct TurnDetection: Codable, Equatable, Hashable, Sendable {
+			@Codable public struct TurnDetection: Equatable, Hashable, Sendable {
 				/// The type of turn detection.
 				public enum VAD: String, Codable, Equatable, Hashable, Sendable {
 					case server = "server_vad"
@@ -108,6 +108,7 @@ import HelperCoders
 				}
 
 				/// Whether or not to automatically generate a response when a VAD stop event occurs.
+                @Default(ifMissing: false)
 				public var createResponse: Bool
 
 				/// Used only for `semantic` mode. The eagerness of the model to respond.
@@ -262,13 +263,13 @@ import HelperCoders
 		public var input: Input
 
 		/// Configuration for output audio.
-		public var output: Output
+		public var output: Output?
 
 		/// Creates a new `Audio` configuration.
 		///
 		/// - Parameter input: Configuration for input audio.
-		/// - Parameter output: Configuration for output audio.
-		public init(input: Input, output: Output) {
+		/// - Parameter output: Configuration for output audio. Output is nil when this is a transcription session
+		public init(input: Input, output: Output?) {
 			self.input = input
 			self.output = output
 		}
@@ -289,8 +290,8 @@ import HelperCoders
 	///
 	/// The model can be instructed on response content and format, (e.g. "be extremely succinct", "act friendly", "here are examples of good responses") and on audio behavior (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently").
 	///
-	/// The instructions are not guaranteed to be followed by the model, but they provide guidance to the model on the desired behavior.
-	public var instructions: String
+	/// The instructions are not guaranteed to be followed by the model, but they provide guidance to the model on the desired behavior. Instructions are nil in the event of a Transcription session
+	public var instructions: String?
 
 	/// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
 	///
@@ -301,7 +302,7 @@ import HelperCoders
 	public var modalities: [Modality]?
 
 	/// The Realtime model used for this session.
-	public var model: Model
+	public var model: Model?
 
 	/// Reference to a prompt template and its variables.
 	public var prompt: Prompt?
diff --git a/Sources/UI/Conversation.swift b/Sources/UI/Conversation.swift
index 5820057..3d3b514 100644
--- a/Sources/UI/Conversation.swift
+++ b/Sources/UI/Conversation.swift
@@ -93,9 +93,9 @@ public final class Conversation: @unchecked Sendable {
 		try await client.connect(using: request)
 	}
 
-	public func connect(ephemeralKey: String, model: Model = .gptRealtime) async throws {
+	public func connect(ephemeralKey: String) async throws {
 		do {
-			try await connect(using: .webRTCConnectionRequest(ephemeralKey: ephemeralKey, model: model))
+			try await connect(using: .webRTCConnectionRequest(ephemeralKey: ephemeralKey))
 		} catch let error as WebRTCConnector.WebRTCError {
 			guard case .invalidEphemeralKey = error else { throw error }
 			throw ConversationError.invalidEphemeralKey
@@ -176,10 +176,32 @@ private extension Conversation {
 				if let sessionUpdateCallback { try updateSession(withChanges: sessionUpdateCallback) }
 			case let .sessionUpdated(_, session):
 				self.session = session
-			case let .conversationItemCreated(_, item, _):
+            case let .conversationItemAdded(_, item, nil):
+                entries.append(item)
+            case let .conversationItemAdded(_, item, previousItemId?):
+                if let entryIndex = entries.firstIndex(where: { $0.id == previousItemId }) {
+                    entries.insert(item, at: entryIndex + 1)
+                } else {
+                    entries.append(item)
+                }
+			case let .conversationItemCreated(_, item, nil):
 				entries.append(item)
+            case let .conversationItemCreated(_, item, previousItemId?):
+                if let entryIndex = entries.firstIndex(where: { $0.id == previousItemId }) {
+                    entries.insert(item, at: entryIndex + 1)
+                } else {
+                    entries.append(item)
+                }
 			case let .conversationItemDeleted(_, itemId):
 				entries.removeAll { $0.id == itemId }
+            case let .conversationItemInputAudioTranscriptionDelta(_, itemId, contentIndex, delta, _, _):
+                updateEvent(id: itemId) { message in
+                    guard case let .inputAudio(audio) = message.content[contentIndex] else { return }
+
+                    message.content[contentIndex] = .inputAudio(
+                        .init(audio: audio.audio, transcript: audio.transcript ?? "" + delta)
+                    )
+                }
 			case let .conversationItemInputAudioTranscriptionCompleted(_, itemId, contentIndex, transcript, _, _):
 				updateEvent(id: itemId) { message in
 					guard case let .inputAudio(audio) = message.content[contentIndex] else { return }
diff --git a/Sources/WebRTC/Extensions/RealtimeAPI+WebRTC.swift b/Sources/WebRTC/Extensions/RealtimeAPI+WebRTC.swift
index ae5f3cd..a636ccb 100644
--- a/Sources/WebRTC/Extensions/RealtimeAPI+WebRTC.swift
+++ b/Sources/WebRTC/Extensions/RealtimeAPI+WebRTC.swift
@@ -11,7 +11,7 @@ public extension RealtimeAPI {
 	}
 
 	/// Connect to the OpenAI WebRTC Realtime API with the given authentication token and model.
-	static func webRTC(ephemeralKey: String, model: Model = .gptRealtime) async throws -> RealtimeAPI {
-		return try await webRTC(connectingTo: .webRTCConnectionRequest(ephemeralKey: ephemeralKey, model: model))
+	static func webRTC(ephemeralKey: String) async throws -> RealtimeAPI {
+		return try await webRTC(connectingTo: .webRTCConnectionRequest(ephemeralKey: ephemeralKey))
 	}
 }
diff --git a/Sources/WebRTC/Extensions/URLRequest+WebRTC.swift b/Sources/WebRTC/Extensions/URLRequest+WebRTC.swift
index 1335ea0..7fe9ed5 100644
--- a/Sources/WebRTC/Extensions/URLRequest+WebRTC.swift
+++ b/Sources/WebRTC/Extensions/URLRequest+WebRTC.swift
@@ -7,10 +7,8 @@ import FoundationNetworking
 fileprivate let baseURL = URL(string: "https://api.openai.com/v1/realtime/calls")!
 
 package extension URLRequest {
-	static func webRTCConnectionRequest(ephemeralKey: String, model: Model) -> URLRequest {
-		var request = URLRequest(url: baseURL.appending(queryItems: [
-			URLQueryItem(name: "model", value: model.rawValue),
-		]))
+	static func webRTCConnectionRequest(ephemeralKey: String) -> URLRequest {
+		var request = URLRequest(url: baseURL)
 
 		request.httpMethod = "POST"
 		request.setValue("Bearer \(ephemeralKey)", forHTTPHeaderField: "Authorization")
diff --git a/Sources/WebRTC/WebRTCConnector.swift b/Sources/WebRTC/WebRTCConnector.swift
index 3aa29df..35d8576 100644
--- a/Sources/WebRTC/WebRTCConnector.swift
+++ b/Sources/WebRTC/WebRTCConnector.swift
@@ -196,6 +196,7 @@ extension WebRTCConnector: LKRTCDataChannelDelegate {
 		do { try stream.yield(decoder.decode(ServerEvent.self, from: buffer.data)) }
 		catch {
 			print("Failed to decode server event: \(String(data: buffer.data, encoding: .utf8) ?? "<invalid utf8>")")
+            print("Error: \(error)")
 			stream.finish(throwing: error)
 		}
 	}

From ab461b8bc0d263ff446da6e2676df176d1a35c2f Mon Sep 17 00:00:00 2001
From: Justin Williams <jblwilliams@gmail.com>
Date: Wed, 29 Oct 2025 15:27:11 -0400
Subject: [PATCH 2/2] Remove hard-coded type parameter on Session. Can be
 realtime or transcription

---
 Sources/Core/Models/Session.swift | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Sources/Core/Models/Session.swift b/Sources/Core/Models/Session.swift
index 4abfaf4..f0b9e34 100644
--- a/Sources/Core/Models/Session.swift
+++ b/Sources/Core/Models/Session.swift
@@ -276,7 +276,8 @@ import HelperCoders
 	}
 
 	/// The type of session to create.
-	public let type: String = "realtime"
+    /// Valid values are "realtime" or "transcription"
+	public let type: String
 
 	/// Unique identifier for the session
 	public var id: String?
@@ -318,8 +319,9 @@ import HelperCoders
 	/// Tools available to the model.
 	public var tools: [Tool]?
 
-	public init(id: String? = nil, audio: Audio, instructions: String, maxResponseOutputTokens: MaxResponseOutputTokens? = nil, modalities: [Modality]? = nil, model: Model, prompt: Prompt? = nil, temperature: Double? = nil, toolChoice: Tool.Choice? = nil, tools: [Tool]? = nil) {
+    public init(id: String? = nil, type: String, audio: Audio, instructions: String, maxResponseOutputTokens: MaxResponseOutputTokens? = nil, modalities: [Modality]? = nil, model: Model, prompt: Prompt? = nil, temperature: Double? = nil, toolChoice: Tool.Choice? = nil, tools: [Tool]? = nil) {
 		self.id = id
+        self.type = type
 		self.tools = tools
 		self.model = model
 		self.audio = audio