番禺网站建设效果品牌推广的三个阶段
通过langchain和ollama实现简单的ragflow.
前提要准备好ollama服务或者对应的openai服务也一样,包括chat模型和embedding模型。
然后大概是以下几个步骤:
1.加载文件夹中文件列表
2.转embeding.
3.索引后
4.chat or question
import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser# 加载文档
root_dir = "D:/context_dir"files = [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith(".txt")]
files = [x.replace("\\", "/") for x in files]
loaders = [TextLoader(f, encoding="utf-8") for f in files]
docs = []
for loader in loaders:docs.extend(loader.load())# 文本分割
text_splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20)
documents = text_splitter.split_documents(docs)# 向量化与索引
embedding = OllamaEmbeddings(model="nomic-embed-text:latest")
vectorstore = Chroma.from_documents(documents, embedding, persist_directory="./vectorstore")# 定义检索器和提示模板
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
template = """Answer the question based only on the following context:{context}Question: {question}"""
prompt = ChatPromptTemplate.from_template(template)
llm = OllamaLLM(model="deepseek-r1:32b")docs_api = lambda docs: "\n\n".join([d.page_content for d in docs])# 构建 RAG 链
chain = ({"context": retriever | docs_api, "question": RunnablePassthrough()}| prompt| llm| StrOutputParser()
)# 测试查询
query = "我想问天上有多少颗星星?"
response = chain.invoke(query)
print("RAG 输出结果:", response)