Add full monorepo: virtual-banker, backend, frontend, docs, scripts, deployment
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
110
backend/rag/service.go
Normal file
110
backend/rag/service.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package rag
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
// Service provides RAG (Retrieval-Augmented Generation) functionality
|
||||
type Service interface {
|
||||
Retrieve(ctx context.Context, query string, tenantID string, topK int) ([]RetrievedDoc, error)
|
||||
Ingest(ctx context.Context, doc *Document) error
|
||||
}
|
||||
|
||||
// RetrievedDoc represents a retrieved document
|
||||
type RetrievedDoc struct {
|
||||
ID string
|
||||
Title string
|
||||
Content string
|
||||
URL string
|
||||
Score float64
|
||||
}
|
||||
|
||||
// Document represents a document to be ingested
|
||||
type Document struct {
|
||||
ID string
|
||||
TenantID string
|
||||
Title string
|
||||
Content string
|
||||
URL string
|
||||
Metadata map[string]interface{}
|
||||
}
|
||||
|
||||
// RAGService implements RAG using pgvector
|
||||
type RAGService struct {
|
||||
db *pgxpool.Pool
|
||||
}
|
||||
|
||||
// NewRAGService creates a new RAG service
|
||||
func NewRAGService(db *pgxpool.Pool) *RAGService {
|
||||
return &RAGService{
|
||||
db: db,
|
||||
}
|
||||
}
|
||||
|
||||
// Retrieve retrieves relevant documents
|
||||
func (s *RAGService) Retrieve(ctx context.Context, query string, tenantID string, topK int) ([]RetrievedDoc, error) {
|
||||
if topK <= 0 {
|
||||
topK = 5
|
||||
}
|
||||
|
||||
// TODO: Generate embedding for query
|
||||
// For now, use simple text search
|
||||
querySQL := `
|
||||
SELECT id, title, content, metadata->>'url' as url,
|
||||
ts_rank(to_tsvector('english', content), plainto_tsquery('english', $1)) as score
|
||||
FROM knowledge_base
|
||||
WHERE tenant_id = $2
|
||||
ORDER BY score DESC
|
||||
LIMIT $3
|
||||
`
|
||||
|
||||
rows, err := s.db.Query(ctx, querySQL, query, tenantID, topK)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var docs []RetrievedDoc
|
||||
for rows.Next() {
|
||||
var doc RetrievedDoc
|
||||
var url *string
|
||||
if err := rows.Scan(&doc.ID, &doc.Title, &doc.Content, &url, &doc.Score); err != nil {
|
||||
continue
|
||||
}
|
||||
if url != nil {
|
||||
doc.URL = *url
|
||||
}
|
||||
docs = append(docs, doc)
|
||||
}
|
||||
|
||||
return docs, nil
|
||||
}
|
||||
|
||||
// Ingest ingests a document into the knowledge base
|
||||
func (s *RAGService) Ingest(ctx context.Context, doc *Document) error {
|
||||
// TODO: Generate embedding for document content
|
||||
// For now, just insert without embedding
|
||||
query := `
|
||||
INSERT INTO knowledge_base (id, tenant_id, title, content, metadata)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
title = $3,
|
||||
content = $4,
|
||||
metadata = $5,
|
||||
updated_at = NOW()
|
||||
`
|
||||
|
||||
metadata := map[string]interface{}{
|
||||
"url": doc.URL,
|
||||
}
|
||||
for k, v := range doc.Metadata {
|
||||
metadata[k] = v
|
||||
}
|
||||
|
||||
_, err := s.db.Exec(ctx, query, doc.ID, doc.TenantID, doc.Title, doc.Content, metadata)
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user