mattt/ollama-swift
A Swift client library for interacting with the
Requirements
- Swift 5.7+
- macOS 13+
- Ollama
Installation
Swift Package Manager
Add the following to your Package.swift file:
.package(url: "https://github.com/mattt/ollama-swift.git", from: "1.8.0")Usage
> [!NOTE]
> The tests and example code for this library use the
> [llama3.2](https://ollama.com/library/llama3.2) model.
> Run the following command to download the model to run them yourself:
>
> ```
> ollama pull llama3.2
> ```
### Initializing the client
```swift
import Ollama
// Use the default client (http://localhost:11434)
let client = Client.default
// Or create a custom client
let customClient = Client(host: URL(string: "http://your-ollama-host:11434")!, userAgent: "MyApp/1.0")
```
### Generating text
Generate text using a specified model:
```swift
do {
let response = try await client.generate(
model: "llama3.2",
prompt: "Tell me a joke about Swift programming.",
options: [
"temperature": 0.7,
"max_tokens": 100
],
keepAlive: .minutes(10) // Keep model loaded for 10 minutes
)
print(response.response)
} catch {
print("Error: \(error)")
}
```
#### Streaming text generation
Generate text in a streaming fashion to receive responses in real-time:
```swift
do {
let stream = try await client.generateStream(
model: "llama3.2",
prompt: "Tell me a joke about Swift programming.",
options: [
"temperature": 0.7,
"max_tokens": 100
]
)
var fullResponse = ""
for try await chunk in stream {
// Process each chunk of the response as it arrives
print(chunk.response, terminator: "")
fullResponse += chunk.response
}
print("\nFull response: \(fullResponse)")
} catch {
print("Error: \(error)")
}
```
### Chatting with a model
Generate a chat completion:
```swift
do {
let response = try await client.chat(
model: "llama3.2",
messages: [
.system("You are a helpful assistant."),
.user("In which city is Apple Inc. located?")
],
keepAlive: .minutes(10) // Keep model loaded for 10 minutes
)
print(response.message.content)
} catch {
print("Error: \(error)")
}
```
#### Streaming chat responses
Stream chat responses to get real-time partial completions:
```swift
do {
let stream = try await client.chatStream(
model: "llama3.2",
messages: [
.system("You are a helpful assistant."),
.user("Write a short poem about Swift programming.")
]
)
var fullContent = ""
for try await chunk in stream {
// Process each chunk of the message as it arrives
if let content = chunk.message.content {
print(content, terminator: "")
fullContent += content
}
}
print("\nComplete poem: \(fullContent)")
} catch {
print("Error: \(error)")
}
```
You can also stream chat responses when using tools:
```swift
do {
let stream = try await client.chatStream(
model: "llama3.2",
messages: [
.system("You are a helpful assistant that can check the weather."),
.user("What's the weather like in Portland?")
],
tools: [weatherTool]
)
for try await chunk in stream {
// Check if the model is making tool calls
if let toolCalls = chunk.message.toolCalls, !toolCalls.isEmpty {
print("Model is requesting tool: \(toolCalls[0].function.name)")
}
// Print content from the message as it streams
if let content = chunk.message.content {
print(content, terminator: "")
}
// Check if this is the final chunk
if chunk.done {
print("\nResponse complete")
}
}
} catch {
print("Error: \(error)")
}
```
### Using Structured Outputs
You can request structured outputs from models by specifying a format.
Pass `"json"` to get back a JSON string,
or specify a full [JSON Schema](https://json-schema.org):
```swift
// Simple JSON format
let response = try await client.chat(
model: "llama3.2",
messages: [.user("List 3 colors.")],
format: "json"
)
// Using JSON schema for more control
let schema: Value = [
"type": "object",
"properties": [
"colors": [
"type": "array",
"items": [
"type": "object",
"properties": [
"name": ["type": "string"],
"hex": ["type": "string"]
],
"required": ["name", "hex"]
]
]
],
"required": ["colors"]
]
let response = try await client.chat(
model: "llama3.2",
messages: [.user("List 3 colors with their hex codes.")],
format: schema
)
// The response will be a JSON object matching the schema:
// {
// "colors": [
// {"name": "papayawhip", "hex": "#FFEFD5"},
// {"name": "indigo", "hex": "#4B0082"},
// {"name": "navy", "hex": "#000080"}
// ]
// }
```
The format parameter works with both `chat` and `generate` methods.
### Using Thinking Models
Some models support a "thinking" mode
where they show their reasoning process before providing the final answer.
This is particularly useful for complex reasoning tasks.
```swift
// Generate with thinking enabled
let response = try await client.generate(
model: "deepseek-r1:8b",
prompt: "What is 17 * 23? Show your work.",
think: true
)
print("Thinking: \(response.thinking ?? "None")")
print("Answer: \(response.response)")
```
You can also use thinking in chat conversations:
```swift
let response = try await client.chat(
model: "deepseek-r1:8b",
messages: [
.system("You are a helpful mathematician."),
.user("Calculate 9.9 + 9.11 and explain your reasoning.")
],
think: true
)
print("Thinking: \(response.message.thinking ?? "None")")
print("Response: \(response.message.content)")
```
> [!TIP]
> You can check which models support thinking by examining their capabilities:
> ```swift
> let modelInfo = try await client.showModel("deepseek-r1:8b")
> if modelInfo.capabilities.contains(.thinking) {
> print("🧠 This model supports thinking!")
> }
> ```
### Managing Model Memory with Keep-Alive
You can control how long a model stays loaded in memory using the `keepAlive` parameter. This is useful for managing memory usage and response times.
```swift
// Use server default (typically 5 minutes)
let response = try await client.generate(
model: "llama3.2",
prompt: "Hello!"
// keepAlive defaults to .default
)
// Keep model loaded for 10 minutes
let response = try await client.generate(
model: "llama3.2",
prompt: "Hello!",
keepAlive: .minutes(10)
)
// Keep model loaded for 2 hours
let response = try await client.chat(
model: "llama3.2",
messages: [.user("Hello!")],
keepAlive: .hours(2)
)
// Keep model loaded for 30 seconds
let response = try await client.generate(
model: "llama3.2",
prompt: "Hello!",
keepAlive: .seconds(30)
)
// Keep model loaded indefinitely
let response = try await client.chat(
model: "llama3.2",
messages: [.user("Hello!")],
keepAlive: .forever
)
// Unload model immediately after response
let response = try await client.generate(
model: "llama3.2",
prompt: "Hello!",
keepAlive: .none
)
```
- **`.default`** - Use the server's default keep-alive behavior (default if not specified)
- **`.none`** - Unload immediately after the request
- **`.seconds(Int)`** - Keep loaded for the specified number of seconds
- **`.minutes(Int)`** - Keep loaded for the specified number of minutes
- **`.hours(Int)`** - Keep loaded for the specified number of hours
- **`.forever`** - Keep loaded indefinitely
> [!NOTE]
> Zero durations (e.g., `.seconds(0)`) are treated as `.none` (unload immediately).
> Negative durations are treated as `.forever` (keep loaded indefinitely).
### Using Tools
Ollama supports tool calling with models,
allowing models to perform complex tasks or interact with external services.
> [!NOTE]
> Tool support requires a [compatible model](https://ollama.com/search?c=tools),
> such as llama3.2.
#### Creating a Tool
Define a tool by specifying its name, description, parameters, and implementation:
```swift
struct WeatherInput: Codable {
let city: String
}
struct WeatherOutput: Codable {
let temperature: Double
let conditions: String
}
let weatherTool = Tool<WeatherInput, WeatherOutput>(
name: "get_current_weather",
description: """
Get the current weather for a city,
with conditions ("sunny", "cloudy", etc.)
and temperature in °C.
""",
parameters: [
"city": [
"type": "string",
"description": "The city to get weather for"
]
],
required: ["city"]
) { input async throws -> WeatherOutput in
// Implement weather lookup logic here
return WeatherOutput(temperature: 18.5, conditions: "cloudy")
}
```
> [!IMPORTANT]
> In version 1.3.0 and later,
> the `parameters` argument should contain only the properties object,
> not the full JSON schema of the tool.
>
> For backward compatibility,
> passing a full schema in the `parameters` argument
> (with `"type"`, `"properties"`, and `"required"` fields)
> is still supported but deprecated and will emit a warning in debug builds.
>
> <details>
> <summary>Click to see code examples of old vs. new format</summary>
>
> ```swift
> // ✅ New format
> let weatherTool = Tool<WeatherInput, WeatherOutput>(
> name: "get_current_weather",
> description: "Get the current weather for a city",
> parameters: [
> "city": [
> "type": "string",
> "description": "The city to get weather for"
> ]
> ],
> required: ["city"]
> ) { /* implementation */ }
>
> // ❌ Deprecated format (still works but not recommended)
> let weatherTool = Tool<WeatherInput, WeatherOutput>(
> name: "get_current_weather",
> description: "Get the current weather for a city",
> parameters: [
> "type": "object",
> "properties": [
> "city": [
> "type": "string",
> "description": "The city to get weather for"
> ]
> ],
> "required": ["city"]
> ]
> ) { /* implementation */ }
> ```
> </details>
#### Using Tools in Chat
Provide tools to the model during chat:
```swift
let messages: [Chat.Message] = [
.system("You are a helpful assistant that can check the weather."),
.user("What's the weather like in Portland?")
]
let response = try await client.chat(
model: "llama3.1",
messages: messages,
tools: [weatherTool]
)
// Handle tool calls in the response
if let toolCalls = response.message.toolCalls {
for toolCall in toolCalls {
print("Tool called: \(toolCall.function.name)")
print("Arguments: \(toolCall.function.arguments)")
}
}
```
#### Multi-turn Tool Conversations
Tools can be used in multi-turn conversations, where the model can use tool results to provide more detailed responses:
```swift
var messages: [Chat.Message] = [
.system("You are a helpful assistant that can convert colors."),
.user("What's the hex code for yellow?")
]
// First turn - model calls the tool
let response1 = try await client.chat(
model: "llama3.1",
messages: messages,
tools: [rgbToHexTool]
)
enum ToolError {
case invalidParameters
}
// Add tool response to conversation
if let toolCall = response1.message.toolCalls?.first {
// Parse the tool arguments
guard let args = toolCall.function.arguments,
let redValue = args["red"],
let greenValue = args["green"],
let blueValue = args["blue"],
let red = Double(redValue, strict: false),
let green = Double(greenValue, strict: false),
let blue = Double(blueValue, strict: false)
else {
throw ToolError.invalidParameters
}
let input = HexColorInput(
red: red,
green: green,
blue: blue
)
// Execute the tool with the input
let hexColor = try await rgbToHexTool(input)
// Add the tool result to the conversation
messages.append(.tool(hexColor))
}
// Continue conversation with tool result
messages.append(.user("What other colors are similar?"))
let response2 = try await client.chat(
model: "llama3.1",
messages: messages,
tools: [rgbToHexTool]
)
```
### Generating embeddings
Generate embeddings for a given text:
```swift
do {
let response = try await client.embed(
model: "llama3.2",
input: "Here is an article about llamas..."
)
print("Embeddings: \(response.embeddings)")
} catch {
print("Error: \(error)")
}
```
Generate embeddings for multiple texts in a single batch:
```swift
do {
let texts = [
"First article about llamas...",
"Second article about alpacas...",
"Third article about vicuñas..."
]
let response = try await client.embed(
model: "llama3.2",
inputs: texts
)
// Access embeddings for each input
for (index, embedding) in response.embeddings.rawValue.enumerated() {
print("Embedding \(index): \(embedding.count) dimensions")
}
} catch {
print("Error: \(error)")
}
```
### Managing models
#### Listing models
List available models:
```swift
do {
let models = try await client.listModels()
for model in models {
print("Model: \(model.name), Modified: \(model.modifiedAt)")
}
} catch {
print("Error: \(error)")
}
```
#### Retrieving model information
Get detailed information about a specific model:
```swift
do {
let modelInfo = try await client.showModel("llama3.2")
print("Modelfile: \(modelInfo.modelfile)")
print("Parameters: \(modelInfo.parameters)")
print("Template: \(modelInfo.template)")
} catch {
print("Error: \(error)")
}
```
#### Pulling a model
Download a model from the Ollama library:
```swift
do {
let success = try await client.pullModel("llama3.2")
if success {
print("Model successfully pulled")
} else {
print("Failed to pull model")
}
} catch {
print("Error: \(error)")
}
```
#### Pushing a model
```swift
do {
let success = try await client.pushModel("mynamespace/mymodel:latest")
if success {
print("Model successfully pushed")
} else {
print("Failed to push model")
}
} catch {
print("Error: \(error)")
}
```License
This project is available under the MIT license. See the LICENSE file for more info.
Package Metadata
Repository: mattt/ollama-swift
Default branch: main
README: README.md