Back to Documentation
Ollama Integration
Use ContextFS with Ollama for local AI with persistent memory
Prerequisites
- 1.Ollama - Install from ollama.ai
- 2.Python 3.9+ - Required for ContextFS
- 3.A local model - e.g.,
ollama pull llama3
Installation
1. Install ContextFS
pip install contextfs2. Pull an Ollama model
ollama pull llama3Integration Options
Option 1: Python API Integration
Combine Ollama's API with ContextFS for memory-augmented local AI:
import ollama
from contextfs import ContextFS
# Initialize ContextFS
ctx = ContextFS()
def chat_with_memory(user_message: str):
# Search for relevant context
memories = ctx.search(user_message, limit=5)
context = "\n".join([m.content for m in memories])
# Build prompt with context
system_prompt = f"""You are a helpful assistant with access to memory.
Relevant context from previous conversations:
{context}
Use this context when relevant to answer the user's question."""
# Chat with Ollama
response = ollama.chat(
model='llama3',
messages=[
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': user_message}
]
)
return response['message']['content']
# Example usage
answer = chat_with_memory("What patterns do we use for authentication?")Option 2: Tool Calling (with compatible models)
For models that support function calling:
import ollama
from contextfs import ContextFS
ctx = ContextFS()
# Define ContextFS tools
tools = [
{
'type': 'function',
'function': {
'name': 'search_memories',
'description': 'Search for relevant memories and context',
'parameters': {
'type': 'object',
'properties': {
'query': {'type': 'string', 'description': 'Search query'}
},
'required': ['query']
}
}
},
{
'type': 'function',
'function': {
'name': 'save_memory',
'description': 'Save important information to memory',
'parameters': {
'type': 'object',
'properties': {
'content': {'type': 'string'},
'type': {'type': 'string', 'enum': ['fact', 'decision', 'code']},
'summary': {'type': 'string'}
},
'required': ['content', 'type']
}
}
}
]
# Chat with tool support
response = ollama.chat(
model='llama3.1', # Models with tool support
messages=[{'role': 'user', 'content': 'Search for API patterns'}],
tools=tools
)RAG with Ollama + ContextFS
Build a complete RAG system with local models:
from contextfs import ContextFS
import ollama
ctx = ContextFS()
def rag_query(question: str):
# 1. Retrieve relevant memories
memories = ctx.search(question, limit=10)
# 2. Format context
context_text = "\n\n".join([
f"[{m.memory_type}] {m.summary}\n{m.content}"
for m in memories
])
# 3. Generate answer with context
prompt = f"""Based on the following context, answer the question.
If the context doesn't contain relevant information, say so.
Context:
{context_text}
Question: {question}
Answer:"""
response = ollama.generate(model='llama3', prompt=prompt)
return response['response']
# Save the interaction to memory
def save_qa(question: str, answer: str):
ctx.save(
content=f"Q: {question}\nA: {answer}",
memory_type="episodic",
summary=f"Q&A about: {question[:50]}..."
)Configuration
Environment variables for ContextFS with Ollama:
# .env file
CONTEXTFS_DATA_DIR=~/.contextfs
CONTEXTFS_EMBEDDING_MODEL=all-MiniLM-L6-v2
# For using Ollama as embedding model
CONTEXTFS_EMBEDDING_BACKEND=ollama
OLLAMA_EMBEDDING_MODEL=nomic-embed-text
# Ollama server (if not default)
OLLAMA_HOST=http://localhost:11434Complete Example
A complete local AI assistant with memory:
#!/usr/bin/env python3
"""Local AI assistant with ContextFS memory."""
import ollama
from contextfs import ContextFS
def main():
ctx = ContextFS()
print("Local AI Assistant with Memory")
print("Type 'quit' to exit, '/save <text>' to save memory")
print("-" * 40)
while True:
user_input = input("\nYou: ").strip()
if user_input.lower() == 'quit':
break
if user_input.startswith('/save '):
content = user_input[6:]
ctx.save(content=content, memory_type="fact")
print("Memory saved!")
continue
# Search for context
memories = ctx.search(user_input, limit=3)
context = "\n".join([f"- {m.content}" for m in memories])
# Build prompt
messages = [
{'role': 'system', 'content': f"Context:\n{context}"},
{'role': 'user', 'content': user_input}
]
# Get response
response = ollama.chat(model='llama3', messages=messages)
print(f"\nAssistant: {response['message']['content']}")
if __name__ == '__main__':
main()Next Steps
Build powerful local AI with persistent memory:
- • Index your codebase:
contextfs index index - • Experiment with different Ollama models
- • Enable cloud sync to share memories across devices
- • Build custom RAG pipelines for your use case