1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62package pipelm
import (
"context"
"errors"
"io"
)
// VectorStore is a particular type of database optimized for storing documents and their embeddings,
// and then fetching of the most relevant documents for a particular query, i.e. those whose embeddings
// are most similar to the embedding of the query.
type VectorStore interface {
// AddDocuments adds the given documents to the store
AddDocuments(context.Context, ...Document) error
// SimilaritySearch returns the k most similar documents to the query
SimilaritySearch(ctx context.Context, query string, k int) ([]Document, error)
// SimilaritySearchVectorWithScore returns the k most similar documents to the query, along with their similarity score
SimilaritySearchVectorWithScore(ctx context.Context, query []float32, k int) ([]ScoredDocument, error)
}
type Document struct {
ID string
PageContent string
Metadata map[string]any
}
type ScoredDocument struct {
Document
Score float32
}
type Splitter = func(string) ([]string, error)
type DocumentLoader interface {
LoadNext(ctx context.Context) (Document, error)
}
type DocumentLoaderFunc func(ctx context.Context) (Document, error)
func (f DocumentLoaderFunc) LoadNext(ctx context.Context) (Document, error) {
return f(ctx)
}
func LoadDocs(n int, loader DocumentLoader) ([]Document, error) {
ctx := context.Background()
var docs []Document
for {
doc, err := loader.LoadNext(ctx)
if errors.Is(err, io.EOF) {
return docs, nil
}
if err != nil {
return nil, err
}
docs = append(docs, doc)
n--
if n == 0 {
return docs, nil
}
}
}