๐Ÿ“ฆ deluan / flowllm

๐Ÿ“„ documents.go ยท 62 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62package pipelm

import (
	"context"
	"errors"
	"io"
)

// VectorStore is a particular type of database optimized for storing documents and their embeddings,
// and then fetching of the most relevant documents for a particular query, i.e. those whose embeddings
// are most similar to the embedding of the query.
type VectorStore interface {
	// AddDocuments adds the given documents to the store
	AddDocuments(context.Context, ...Document) error
	// SimilaritySearch returns the k most similar documents to the query
	SimilaritySearch(ctx context.Context, query string, k int) ([]Document, error)
	// SimilaritySearchVectorWithScore returns the k most similar documents to the query, along with their similarity score
	SimilaritySearchVectorWithScore(ctx context.Context, query []float32, k int) ([]ScoredDocument, error)
}

type Document struct {
	ID          string
	PageContent string
	Metadata    map[string]any
}

type ScoredDocument struct {
	Document
	Score float32
}

type Splitter = func(string) ([]string, error)

type DocumentLoader interface {
	LoadNext(ctx context.Context) (Document, error)
}

type DocumentLoaderFunc func(ctx context.Context) (Document, error)

func (f DocumentLoaderFunc) LoadNext(ctx context.Context) (Document, error) {
	return f(ctx)
}

func LoadDocs(n int, loader DocumentLoader) ([]Document, error) {
	ctx := context.Background()
	var docs []Document
	for {
		doc, err := loader.LoadNext(ctx)
		if errors.Is(err, io.EOF) {
			return docs, nil
		}
		if err != nil {
			return nil, err
		}
		docs = append(docs, doc)
		n--
		if n == 0 {
			return docs, nil
		}
	}
}