AgapovOne/swift-lexbor
C lexbor html parser brought to Swift
Quick Start
import SwiftLexbor
let doc = SwiftLexbor.parseFragment("<p>Hello <b>world</b></p>")
for node in doc.children {
switch node {
case .element(let el): print(el.tagName, el.textContent)
case .text(let text): print(text)
case .comment: break
}
}Installation
Add to your Package.swift:
dependencies: [
.package(url: "https://github.com/AgapovOne/swift-lexbor.git", from: "0.1.0"),
],
targets: [
.target(name: "YourTarget", dependencies: [
.product(name: "SwiftLexbor", package: "swift-lexbor"),
]),
]<details> <summary>Direct access to C API</summary>
If you need the raw lexbor C API without the Swift wrapper:
dependencies: [
.product(name: "CLexbor", package: "swift-lexbor"),
]import CLexbor
let doc = lxb_html_document_create()!
defer { _ = lxb_html_document_destroy(doc) }
let html = "<p>Hello</p>"
let bytes = Array(html.utf8)
bytes.withUnsafeBufferPointer { buffer in
_ = lxb_html_document_parse(doc, buffer.baseAddress, buffer.count)
}</details>
Usage
Parsing
// Fragment — no html/head/body wrappers
let doc = SwiftLexbor.parseFragment("<div><p>text</p></div>")
// Full document — includes html/head/body
let fullDoc = SwiftLexbor.parse("<html><body><p>text</p></body></html>")Visitor Pattern
struct TextExtractor: HTMLVisitor {
typealias Result = String?
func visitText(_ text: String) -> String? { text }
func visitElement(_ element: HTMLElement) -> String? { element.textContent }
}
let texts = doc.accept(visitor: TextExtractor())<details> <summary>Full HTMLVisitor protocol</summary>
protocol HTMLVisitor {
associatedtype Result
func visitHeading(_ element: HTMLElement, level: Int) -> Result
func visitParagraph(_ element: HTMLElement) -> Result
func visitLink(_ element: HTMLElement, href: String?) -> Result
func visitList(_ element: HTMLElement, ordered: Bool) -> Result
func visitListItem(_ element: HTMLElement) -> Result
func visitBlockquote(_ element: HTMLElement) -> Result
func visitCodeBlock(_ element: HTMLElement) -> Result
func visitTable(_ element: HTMLElement) -> Result
func visitBold(_ element: HTMLElement) -> Result
func visitItalic(_ element: HTMLElement) -> Result
func visitCode(_ element: HTMLElement) -> Result
func visitUnderline(_ element: HTMLElement) -> Result
func visitStrikethrough(_ element: HTMLElement) -> Result
func visitSubscript(_ element: HTMLElement) -> Result
func visitSuperscript(_ element: HTMLElement) -> Result
func visitImage(_ element: HTMLElement, src: String?, alt: String?) -> Result
func visitLineBreak() -> Result
func visitHorizontalRule() -> Result
func visitText(_ text: String) -> Result
func visitComment(_ text: String) -> Result
func visitElement(_ element: HTMLElement) -> Result
}All methods have default implementations. visitElement is the catch-all fallback. Semantic methods (visitHeading, visitParagraph, etc.) delegate to visitElement by default.
Inline formatting methods: visitBold (b/strong), visitItalic (i/em), visitCode (code), visitUnderline (u/ins), visitStrikethrough (s/del/strike), visitSubscript (sub), visitSuperscript (sup), visitImage (img), visitLineBreak (br).
</details>
Serialization
let doc = SwiftLexbor.parseFragment("<p>Hello <b>world</b></p>")
let html = HTMLSerializer.serialize(doc) // "<p>Hello <b>world</b></p>"Handles void elements, boolean attributes, HTML entity escaping, and sorted attributes for deterministic output.
Sequence Conformance
HTMLDocument and HTMLElement conform to Sequence, so you can iterate directly:
let doc = SwiftLexbor.parseFragment("<p>one</p><p>two</p>")
for node in doc {
if case .element(let el) = node {
print(el.tagName) // "p", "p"
}
}
// Use map, filter, first(where:), etc.
let tags = doc.compactMap { node -> String? in
guard case .element(let el) = node else { return nil }
return el.tagName
}<details> <summary>AttributedString visitor example</summary>
Converts HTML to AttributedString. Copy and customize for your needs:
import Foundation
struct AttributedStringBuilder: HTMLVisitor {
typealias Result = AttributedString
func visitText(_ text: String) -> AttributedString {
AttributedString(text)
}
func visitElement(_ element: HTMLElement) -> AttributedString {
element.children.map { $0.accept(visitor: self) }.reduce(AttributedString(), +)
}
func visitBold(_ element: HTMLElement) -> AttributedString {
var result = visitElement(element)
result.inlinePresentationIntent = .stronglyEmphasized
return result
}
func visitItalic(_ element: HTMLElement) -> AttributedString {
var result = visitElement(element)
result.inlinePresentationIntent = .emphasized
return result
}
func visitCode(_ element: HTMLElement) -> AttributedString {
var result = visitElement(element)
result.inlinePresentationIntent = .code
return result
}
func visitStrikethrough(_ element: HTMLElement) -> AttributedString {
var result = visitElement(element)
result.strikethroughStyle = .single
return result
}
func visitLink(_ element: HTMLElement, href: String?) -> AttributedString {
var result = visitElement(element)
if let href, let url = URL(string: href) {
result.link = url
}
return result
}
func visitLineBreak() -> AttributedString {
AttributedString("\n")
}
func visitParagraph(_ element: HTMLElement) -> AttributedString {
visitElement(element) + AttributedString("\n\n")
}
}
// Usage:
let doc = SwiftLexbor.parseFragment("<p>Hello <b>world</b> and <a href=\"https://example.com\">link</a></p>")
let builder = AttributedStringBuilder()
let attributed = doc.children.map { $0.accept(visitor: builder) }.reduce(AttributedString(), +)</details>
<details> <summary>SwiftUI example</summary>
Parse HTML once, render the AST in SwiftUI.
let document = SwiftLexbor.parseFragment(html)
HTMLDocView(document)Full working example with headings, blockquotes, inline formatting, and tappable links → Example/
</details>
<details> <summary>AST types reference</summary>
All types are Equatable, Hashable, and Sendable.
struct HTMLDocument { let children: [HTMLNode] }
enum HTMLNode {
case element(HTMLElement)
case text(String)
case comment(String)
}
struct HTMLElement {
let tagName: String
let attributes: [String: String]
let children: [HTMLNode]
var textContent: String { get }
}</details>
Parser Behavior
script,style,templatetags are skipped- HTML entities are decoded (
&->&) - Invalid HTML is handled via lexbor's error recovery
- Boolean attributes have empty string value (
disabled->""). UsehasAttribute(_:)to check presence.
Benchmarks
Run locally:
swift run --package-path Benchmarks -c releaseSee Benchmarks/ for full results and methodology.
Requirements
- Swift 5.10+
- iOS 13+ / macOS 10.15+
License
lexbor is licensed under the Apache License 2.0. The Swift wrapper follows the same license.
Package Metadata
Repository: AgapovOne/swift-lexbor
Stars: 3
Forks: 0
Open issues: 0
Default branch: main
Primary language: c
License: Apache-2.0
Topics: html, lexbor, parser, swift
README: README.md