chunking and file output

This commit is contained in:
Erik Winter 2024-06-13 09:01:55 +02:00
parent 2861e5349d
commit 4383768203
2 changed files with 28 additions and 4 deletions

View File

@ -1 +1 @@
test.md *.md

View File

@ -8,8 +8,9 @@ import (
) )
var ( var (
inputFile = flag.String("i", "", "input file") inputFile = flag.String("i", "", "input file (markdown, or plain text)")
model = flag.String("m", "llama3", "model file") model = flag.String("m", "llama3", "llm model")
outputFile = flag.String("o", "", "output file")
) )
const ( const (
@ -35,8 +36,22 @@ func main() {
os.Exit(1) os.Exit(1)
} }
chunks := make([]string, 0)
for _, par := range strings.Split(string(doc), "\n\n") {
last := len(chunks) - 1
switch {
case last == -1:
chunks = append(chunks, par)
case last >= 0 && len(par)+len(chunks[last]) > 500:
chunks = append(chunks, par)
default:
chunks[last] = fmt.Sprintf("%s\n\n%s", chunks[last], par)
}
}
fmt.Printf("translating %d chunks\n", len(chunks))
translated := make([]string, 0) translated := make([]string, 0)
for _, chunk := range strings.Split(string(doc), "\n\n") { for _, chunk := range chunks {
prompt := fmt.Sprintf("%s\n---\n%s", promptStart, chunk) prompt := fmt.Sprintf("%s\n---\n%s", promptStart, chunk)
res, err := ollama.Generate(*model, prompt) res, err := ollama.Generate(*model, prompt)
if err != nil { if err != nil {
@ -47,5 +62,14 @@ func main() {
translated = append(translated, res) translated = append(translated, res)
} }
if *outputFile != "" {
if err := os.WriteFile(*outputFile, []byte(strings.Join(translated, "\n")), 0644); err != nil {
fmt.Println(err)
os.Exit(1)
}
fmt.Printf("\nfile written to %s\n", *outputFile)
os.Exit(0)
}
fmt.Printf("\n\n%s\n", strings.Join(translated, "\n")) fmt.Printf("\n\n%s\n", strings.Join(translated, "\n"))
} }