DocsAI FeaturesRAG Pipeline

RAG Pipeline

Shipfastai includes a production-ready Retrieval-Augmented Generation (RAG) pipeline.

Overview

RAG combines document retrieval with LLM generation:

1. Ingest - Process and embed documents

2. Retrieve - Find relevant chunks for a query

3. Generate - Use LLM to answer with context

Document Ingestion

Upload and Process

backend/app/services/rag.py

async def ingest_document(file: UploadFile, user_id: str):

# 1. Extract text

text = await extract_text(file)

# 2. Split into chunks

chunks = text_splitter.split_text(text)

# 3. Generate embeddings

embeddings = await get_embeddings(chunks)

# 4. Store in Supabase

for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):

await supabase.table('documents').insert({

'user_id': user_id,

'content': chunk,

'embedding': embedding,

'metadata': {'source': file.filename, 'chunk': i}

})

Text Splitter Configuration

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(

chunk_size=1000,

chunk_overlap=200,

separators=["\n\n", "\n", ". ", " "]

)

Retrieval

Vector Search

async def retrieve_context(query: str, user_id: str, k: int = 5):

# 1. Embed query

query_embedding = await get_embedding(query)

# 2. Search similar documents

results = await supabase.rpc('match_documents', {

'query_embedding': query_embedding,

'match_count': k,

'filter': {'user_id': user_id}

})

return results

Supabase Vector Search Function

CREATE FUNCTION match_documents(

query_embedding vector(1536),

match_count int,

filter jsonb DEFAULT '{}'

) RETURNS TABLE (content text, similarity float)

AS $$

SELECT content, 1 - (embedding <=> query_embedding) as similarity

FROM documents

WHERE user_id = (filter->>'user_id')::uuid

ORDER BY embedding <=> query_embedding

LIMIT match_count;

$$ LANGUAGE sql;

Generation

Chat with Context

async def chat(query: str, user_id: str):

# 1. Retrieve relevant context

context = await retrieve_context(query, user_id)

# 2. Build prompt

prompt = f"""Answer based on the following context:

{context}

Question: {query}

Answer:"""

# 3. Generate response

response = await openai.chat.completions.create(

model="gpt-4-turbo",

messages=[{"role": "user", "content": prompt}]

)

return response.choices[0].message.content

Frontend Integration

const { data, isLoading } = useQuery({

queryKey: ['chat', query],

queryFn: () => fetch('/api/chat', {

method: 'POST',

body: JSON.stringify({ query }),

}).then(r => r.json()),

});