RAG Pipeline
Shipfastai includes a production-ready Retrieval-Augmented Generation (RAG) pipeline.
Overview
RAG combines document retrieval with LLM generation:
1. Ingest - Process and embed documents
2. Retrieve - Find relevant chunks for a query
3. Generate - Use LLM to answer with context
Document Ingestion
Upload and Process
backend/app/services/rag.py
async def ingest_document(file: UploadFile, user_id: str):
# 1. Extract text
text = await extract_text(file)
# 2. Split into chunks
chunks = text_splitter.split_text(text)
# 3. Generate embeddings
embeddings = await get_embeddings(chunks)
# 4. Store in Supabase
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
await supabase.table('documents').insert({
'user_id': user_id,
'content': chunk,
'embedding': embedding,
'metadata': {'source': file.filename, 'chunk': i}
})
Text Splitter Configuration
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n\n", "\n", ". ", " "]
)
Retrieval
Vector Search
async def retrieve_context(query: str, user_id: str, k: int = 5):
# 1. Embed query
query_embedding = await get_embedding(query)
# 2. Search similar documents
results = await supabase.rpc('match_documents', {
'query_embedding': query_embedding,
'match_count': k,
'filter': {'user_id': user_id}
})
return results
Supabase Vector Search Function
CREATE FUNCTION match_documents(
query_embedding vector(1536),
match_count int,
filter jsonb DEFAULT '{}'
) RETURNS TABLE (content text, similarity float)
AS $$
SELECT content, 1 - (embedding <=> query_embedding) as similarity
FROM documents
WHERE user_id = (filter->>'user_id')::uuid
ORDER BY embedding <=> query_embedding
LIMIT match_count;
$$ LANGUAGE sql;
Generation
Chat with Context
async def chat(query: str, user_id: str):
# 1. Retrieve relevant context
context = await retrieve_context(query, user_id)
# 2. Build prompt
prompt = f"""Answer based on the following context:
{context}
Question: {query}
Answer:"""
# 3. Generate response
response = await openai.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
Frontend Integration
const { data, isLoading } = useQuery({
queryKey: ['chat', query],
queryFn: () => fetch('/api/chat', {
method: 'POST',
body: JSON.stringify({ query }),
}).then(r => r.json()),
});