// ingestion/internal/extract/pdf.go package extract import ( "bytes" "fmt" "os/exec" "strings" ) // extractPDF runs pdftotext on path and returns the extracted text. // pdftotext must be installed (package: poppler-utils on Alpine/Debian, poppler on Homebrew). func extractPDF(path string) (string, error) { cmd := exec.Command("pdftotext", "-q", path, "-") var stdout, stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { errMsg := strings.TrimSpace(stderr.String()) if errMsg == "" { errMsg = err.Error() } return "", fmt.Errorf("pdftotext: %s", errMsg) } return strings.TrimSpace(stdout.String()), nil }