Files
2026-04-23 15:45:20 +02:00

40 lines
852 B
Go

// ingestion/internal/extract/extract.go
package extract
import (
"fmt"
"os"
"strings"
)
// Text reads the file at path and returns its plain-text content.
// Supported extensions: .md, .txt (passthrough), .pdf (via pdftotext).
func Text(path string) (string, error) {
ext := strings.ToLower(fileExt(path))
switch ext {
case ".md", ".txt":
b, err := os.ReadFile(path)
if err != nil {
return "", fmt.Errorf("read %s: %w", path, err)
}
return string(b), nil
case ".pdf":
return extractPDF(path)
default:
return "", fmt.Errorf("unsupported file extension: %s", ext)
}
}
// fileExt returns the file extension including the dot, lowercased.
func fileExt(path string) string {
for i := len(path) - 1; i >= 0; i-- {
if path[i] == '.' {
return path[i:]
}
if path[i] == '/' || path[i] == '\\' {
break
}
}
return ""
}