chunking and file output
This commit is contained in:
parent
2861e5349d
commit
4383768203
|
@ -1 +1 @@
|
|||
test.md
|
||||
*.md
|
|
@ -8,8 +8,9 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
inputFile = flag.String("i", "", "input file")
|
||||
model = flag.String("m", "llama3", "model file")
|
||||
inputFile = flag.String("i", "", "input file (markdown, or plain text)")
|
||||
model = flag.String("m", "llama3", "llm model")
|
||||
outputFile = flag.String("o", "", "output file")
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -35,8 +36,22 @@ func main() {
|
|||
os.Exit(1)
|
||||
}
|
||||
|
||||
chunks := make([]string, 0)
|
||||
for _, par := range strings.Split(string(doc), "\n\n") {
|
||||
last := len(chunks) - 1
|
||||
switch {
|
||||
case last == -1:
|
||||
chunks = append(chunks, par)
|
||||
case last >= 0 && len(par)+len(chunks[last]) > 500:
|
||||
chunks = append(chunks, par)
|
||||
default:
|
||||
chunks[last] = fmt.Sprintf("%s\n\n%s", chunks[last], par)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("translating %d chunks\n", len(chunks))
|
||||
translated := make([]string, 0)
|
||||
for _, chunk := range strings.Split(string(doc), "\n\n") {
|
||||
for _, chunk := range chunks {
|
||||
prompt := fmt.Sprintf("%s\n---\n%s", promptStart, chunk)
|
||||
res, err := ollama.Generate(*model, prompt)
|
||||
if err != nil {
|
||||
|
@ -47,5 +62,14 @@ func main() {
|
|||
translated = append(translated, res)
|
||||
}
|
||||
|
||||
if *outputFile != "" {
|
||||
if err := os.WriteFile(*outputFile, []byte(strings.Join(translated, "\n")), 0644); err != nil {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Printf("\nfile written to %s\n", *outputFile)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
fmt.Printf("\n\n%s\n", strings.Join(translated, "\n"))
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue