diff --git a/main.go b/main.go index dae45b6..a4abd41 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "go-mod.ewintr.nl/henk/llm" "go-mod.ewintr.nl/henk/parse" ) @@ -17,6 +18,19 @@ func main() { } fmt.Printf("%s\n", project.Tree()) + f, err := parse.NewFile("./llm/memory.go") + if err != nil { + fmt.Println(err) + os.Exit(1) + } + ollamaClient := llm.NewOllama("http://192.168.1.12:11434", "nomic-embed-text:latest", "qwen2.5-coder:32b-instruct-q8_0") + short, long, err := parse.Describe(f, ollamaClient) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + fmt.Printf("short: %s\n\nlong: %s\n", short, long) + // err := structure.ProcessGoFile(filePath) // if err != nil { // fmt.Println(err) @@ -28,7 +42,6 @@ func main() { // if err != nil { // log.Fatalf("Error walking the path: %v\n", err) // } -// ollamaClient := llm.NewOllama("http://192.168.1.12:11434", "nomic-embed-text:latest", "qwen2.5-coder:32b-instruct-q8_0") // response, err := ollamaClient.Complete("You are a nice person.", "Say Hi!") // if err != nil { diff --git a/parse/describe.go b/parse/describe.go new file mode 100644 index 0000000..d9879b3 --- /dev/null +++ b/parse/describe.go @@ -0,0 +1,56 @@ +package parse + +import ( + "encoding/json" + "fmt" + + "go-mod.ewintr.nl/henk/llm" +) + +const ( + system = "You are an expert in software development and can understand and summarize any file one might encounter in a software project" + schema = `{ + "type": "object", + "properties": { + "sentenceSummary": { + "type": "string" + }, + "paragraphSummary": { + "type": "string" + } + } +}` + promptFmtSource = `The following is the content of a file with Go source code. Give two summaries for it: +- One simple sentence that describes the source code functionality in the file +- One paragraph that also the source code functionality, but that goes a little more in depth + +Don't explain that it is source code, or that it is Go. Focus on conveying the functionality that the code implements. + +This is the file %s: + +--- + +%s + +--- + +Respond in JSON.` +) + +type DescribeResponse struct { + Sentence string `json:"sentenceSummary"` + Paragraph string `json:"paragraphSummary"` +} + +func Describe(file *File, client *llm.Ollama) (string, string, error) { + prompt := fmt.Sprintf(promptFmtSource, file.Name(), file.Content) + res, err := client.Complete(system, prompt, []byte(schema)) + if err != nil { + return "", "", fmt.Errorf("could not complete request: %v", err) + } + var sum DescribeResponse + if err := json.Unmarshal([]byte(res), &sum); err != nil { + return "", "", fmt.Errorf("could not unmarshal response: %v", err) + } + return sum.Sentence, sum.Paragraph, nil +} diff --git a/parse/tree.go b/parse/tree.go index ac6ee31..8619e5b 100644 --- a/parse/tree.go +++ b/parse/tree.go @@ -1,10 +1,16 @@ package parse import ( + "errors" "fmt" "os" "path/filepath" "strings" + "unicode/utf8" +) + +var ( + ErrNotATextFile = errors.New("not a text file") ) type ElementType string @@ -16,8 +22,10 @@ type Element struct { } type File struct { + Binary bool Description string Path string + Content string Elements []Element } @@ -95,10 +103,19 @@ func (p *Project) Tree() string { } func NewFile(path string) (*File, error) { - // fmt.Println(path) file := &File{ Path: path, } + txt, err := readTextFile(path) + switch { + case errors.Is(err, ErrNotATextFile): + file.Binary = true + case err != nil: + return nil, err + default: + file.Binary = false + file.Content = txt + } return file, nil } @@ -154,3 +171,20 @@ func (d *Directory) Tree(indent int) []string { return res } + +func readTextFile(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + + for i := 0; i < len(data); i++ { + r, size := utf8.DecodeRune(data[i:]) + i += size - 1 + if r == utf8.RuneError && !strings.ContainsRune("\r\n\t ", r) { + return "", ErrNotATextFile + } + } + + return string(data), nil +}