mattt/ollama-swift
A Swift client library for interacting with the
Requirements
- Swift 5.7+
- macOS 13+
- Ollama
Installation
Swift Package Manager
Add the following to your Package.swift file:
.package(url: "https://github.com/mattt/ollama-swift.git", from: "1.8.0")Usage
[!NOTE] The tests and example code for this library use the llama3.2 model. Run the following command to download the model to run them yourself:
ollama pull llama3.2
Initializing the client
import Ollama
// Use the default client (http://localhost:11434)
let client = Client.default
// Or create a custom client
let customClient = Client(host: URL(string: "http://your-ollama-host:11434")!, userAgent: "MyApp/1.0")Generating text
Generate text using a specified model:
do {
let response = try await client.generate(
model: "llama3.2",
prompt: "Tell me a joke about Swift programming.",
options: [
"temperature": 0.7,
"max_tokens": 100
],
keepAlive: .minutes(10) // Keep model loaded for 10 minutes
)
print(response.response)
} catch {
print("Error: \(error)")
}Streaming text generation
Generate text in a streaming fashion to receive responses in real-time:
do {
let stream = try await client.generateStream(
model: "llama3.2",
prompt: "Tell me a joke about Swift programming.",
options: [
"temperature": 0.7,
"max_tokens": 100
]
)
var fullResponse = ""
for try await chunk in stream {
// Process each chunk of the response as it arrives
print(chunk.response, terminator: "")
fullResponse += chunk.response
}
print("\nFull response: \(fullResponse)")
} catch {
print("Error: \(error)")
}Chatting with a model
Generate a chat completion:
do {
let response = try await client.chat(
model: "llama3.2",
messages: [
.system("You are a helpful assistant."),
.user("In which city is Apple Inc. located?")
],
keepAlive: .minutes(10) // Keep model loaded for 10 minutes
)
print(response.message.content)
} catch {
print("Error: \(error)")
}Streaming chat responses
Stream chat responses to get real-time partial completions:
do {
let stream = try await client.chatStream(
model: "llama3.2",
messages: [
.system("You are a helpful assistant."),
.user("Write a short poem about Swift programming.")
]
)
var fullContent = ""
for try await chunk in stream {
// Process each chunk of the message as it arrives
if let content = chunk.message.content {
print(content, terminator: "")
fullContent += content
}
}
print("\nComplete poem: \(fullContent)")
} catch {
print("Error: \(error)")
}You can also stream chat responses when using tools:
do {
let stream = try await client.chatStream(
model: "llama3.2",
messages: [
.system("You are a helpful assistant that can check the weather."),
.user("What's the weather like in Portland?")
],
tools: [weatherTool]
)
for try await chunk in stream {
// Check if the model is making tool calls
if let toolCalls = chunk.message.toolCalls, !toolCalls.isEmpty {
print("Model is requesting tool: \(toolCalls[0].function.name)")
}
// Print content from the message as it streams
if let content = chunk.message.content {
print(content, terminator: "")
}
// Check if this is the final chunk
if chunk.done {
print("\nResponse complete")
}
}
} catch {
print("Error: \(error)")
}Using Structured Outputs
You can request structured outputs from models by specifying a format. Pass "json" to get back a JSON string, or specify a full JSON Schema:
// Simple JSON format
let response = try await client.chat(
model: "llama3.2",
messages: [.user("List 3 colors.")],
format: "json"
)
// Using JSON schema for more control
let schema: Value = [
"type": "object",
"properties": [
"colors": [
"type": "array",
"items": [
"type": "object",
"properties": [
"name": ["type": "string"],
"hex": ["type": "string"]
],
"required": ["name", "hex"]
]
]
],
"required": ["colors"]
]
let response = try await client.chat(
model: "llama3.2",
messages: [.user("List 3 colors with their hex codes.")],
format: schema
)
// The response will be a JSON object matching the schema:
// {
// "colors": [
// {"name": "papayawhip", "hex": "#FFEFD5"},
// {"name": "indigo", "hex": "#4B0082"},
// {"name": "navy", "hex": "#000080"}
// ]
// }The format parameter works with both chat and generate methods.
Using Thinking Models
Some models support a "thinking" mode where they show their reasoning process before providing the final answer. This is particularly useful for complex reasoning tasks.
// Generate with thinking enabled
let response = try await client.generate(
model: "deepseek-r1:8b",
prompt: "What is 17 * 23? Show your work.",
think: true
)
print("Thinking: \(response.thinking ?? "None")")
print("Answer: \(response.response)")You can also use thinking in chat conversations:
let response = try await client.chat(
model: "deepseek-r1:8b",
messages: [
.system("You are a helpful mathematician."),
.user("Calculate 9.9 + 9.11 and explain your reasoning.")
],
think: true
)
print("Thinking: \(response.message.thinking ?? "None")")
print("Response: \(response.message.content)")[!TIP] You can check which models support thinking by examining their capabilities:
let modelInfo = try await client.showModel("deepseek-r1:8b") if modelInfo.capabilities.contains(.thinking) { print("🧠 This model supports thinking!") }
Managing Model Memory with Keep-Alive
You can control how long a model stays loaded in memory using the keepAlive parameter. This is useful for managing memory usage and response times.
// Use server default (typically 5 minutes)
let response = try await client.generate(
model: "llama3.2",
prompt: "Hello!"
// keepAlive defaults to .default
)
// Keep model loaded for 10 minutes
let response = try await client.generate(
model: "llama3.2",
prompt: "Hello!",
keepAlive: .minutes(10)
)
// Keep model loaded for 2 hours
let response = try await client.chat(
model: "llama3.2",
messages: [.user("Hello!")],
keepAlive: .hours(2)
)
// Keep model loaded for 30 seconds
let response = try await client.generate(
model: "llama3.2",
prompt: "Hello!",
keepAlive: .seconds(30)
)
// Keep model loaded indefinitely
let response = try await client.chat(
model: "llama3.2",
messages: [.user("Hello!")],
keepAlive: .forever
)
// Unload model immediately after response
let response = try await client.generate(
model: "llama3.2",
prompt: "Hello!",
keepAlive: .none
).default- Use the server's default keep-alive behavior (default if not specified).none- Unload immediately after the request.seconds(Int)- Keep loaded for the specified number of seconds.minutes(Int)- Keep loaded for the specified number of minutes.hours(Int)- Keep loaded for the specified number of hours.forever- Keep loaded indefinitely
[!NOTE] Zero durations (e.g.,
.seconds(0)) are treated as.none(unload immediately). Negative durations are treated as.forever(keep loaded indefinitely).
Using Tools
Ollama supports tool calling with models, allowing models to perform complex tasks or interact with external services.
[!NOTE] Tool support requires a compatible model, such as llama3.2.
Creating a Tool
Define a tool by specifying its name, description, parameters, and implementation:
struct WeatherInput: Codable {
let city: String
}
struct WeatherOutput: Codable {
let temperature: Double
let conditions: String
}
let weatherTool = Tool<WeatherInput, WeatherOutput>(
name: "get_current_weather",
description: """
Get the current weather for a city,
with conditions ("sunny", "cloudy", etc.)
and temperature in °C.
""",
parameters: [
"city": [
"type": "string",
"description": "The city to get weather for"
]
],
required: ["city"]
) { input async throws -> WeatherOutput in
// Implement weather lookup logic here
return WeatherOutput(temperature: 18.5, conditions: "cloudy")
}[!IMPORTANT] In version 1.3.0 and later, the
parametersargument should contain only the properties object, not the full JSON schema of the tool.For backward compatibility, passing a full schema in the
parametersargument (with"type","properties", and"required"fields) is still supported but deprecated and will emit a warning in debug builds.<details> <summary>Click to see code examples of old vs. new format</summary>
// ✅ New format let weatherTool = Tool<WeatherInput, WeatherOutput>( name: "get_current_weather", description: "Get the current weather for a city", parameters: [ "city": [ "type": "string", "description": "The city to get weather for" ] ], required: ["city"] ) { /* implementation */ } // ❌ Deprecated format (still works but not recommended) let weatherTool = Tool<WeatherInput, WeatherOutput>( name: "get_current_weather", description: "Get the current weather for a city", parameters: [ "type": "object", "properties": [ "city": [ "type": "string", "description": "The city to get weather for" ] ], "required": ["city"] ] ) { /* implementation */ }</details>
Using Tools in Chat
Provide tools to the model during chat:
let messages: [Chat.Message] = [
.system("You are a helpful assistant that can check the weather."),
.user("What's the weather like in Portland?")
]
let response = try await client.chat(
model: "llama3.1",
messages: messages,
tools: [weatherTool]
)
// Handle tool calls in the response
if let toolCalls = response.message.toolCalls {
for toolCall in toolCalls {
print("Tool called: \(toolCall.function.name)")
print("Arguments: \(toolCall.function.arguments)")
}
}Multi-turn Tool Conversations
Tools can be used in multi-turn conversations, where the model can use tool results to provide more detailed responses:
var messages: [Chat.Message] = [
.system("You are a helpful assistant that can convert colors."),
.user("What's the hex code for yellow?")
]
// First turn - model calls the tool
let response1 = try await client.chat(
model: "llama3.1",
messages: messages,
tools: [rgbToHexTool]
)
enum ToolError {
case invalidParameters
}
// Add tool response to conversation
if let toolCall = response1.message.toolCalls?.first {
// Parse the tool arguments
guard let args = toolCall.function.arguments,
let redValue = args["red"],
let greenValue = args["green"],
let blueValue = args["blue"],
let red = Double(redValue, strict: false),
let green = Double(greenValue, strict: false),
let blue = Double(blueValue, strict: false)
else {
throw ToolError.invalidParameters
}
let input = HexColorInput(
red: red,
green: green,
blue: blue
)
// Execute the tool with the input
let hexColor = try await rgbToHexTool(input)
// Add the tool result to the conversation
messages.append(.tool(hexColor))
}
// Continue conversation with tool result
messages.append(.user("What other colors are similar?"))
let response2 = try await client.chat(
model: "llama3.1",
messages: messages,
tools: [rgbToHexTool]
)Generating embeddings
Generate embeddings for a given text:
do {
let response = try await client.embed(
model: "llama3.2",
input: "Here is an article about llamas..."
)
print("Embeddings: \(response.embeddings)")
} catch {
print("Error: \(error)")
}Generate embeddings for multiple texts in a single batch:
do {
let texts = [
"First article about llamas...",
"Second article about alpacas...",
"Third article about vicuñas..."
]
let response = try await client.embed(
model: "llama3.2",
inputs: texts
)
// Access embeddings for each input
for (index, embedding) in response.embeddings.rawValue.enumerated() {
print("Embedding \(index): \(embedding.count) dimensions")
}
} catch {
print("Error: \(error)")
}Managing models
Listing models
List available models:
do {
let models = try await client.listModels()
for model in models {
print("Model: \(model.name), Modified: \(model.modifiedAt)")
}
} catch {
print("Error: \(error)")
}Retrieving model information
Get detailed information about a specific model:
do {
let modelInfo = try await client.showModel("llama3.2")
print("Modelfile: \(modelInfo.modelfile)")
print("Parameters: \(modelInfo.parameters)")
print("Template: \(modelInfo.template)")
} catch {
print("Error: \(error)")
}Pulling a model
Download a model from the Ollama library:
do {
let success = try await client.pullModel("llama3.2")
if success {
print("Model successfully pulled")
} else {
print("Failed to pull model")
}
} catch {
print("Error: \(error)")
}Pushing a model
do {
let success = try await client.pushModel("mynamespace/mymodel:latest")
if success {
print("Model successfully pushed")
} else {
print("Failed to push model")
}
} catch {
print("Error: \(error)")
}License
This project is available under the MIT license. See the LICENSE file for more info.
Package Metadata
Repository: mattt/ollama-swift
Default branch: main
README: README.md