import os
from flask import Flask, request, render_template, jsonify, Response
from llama_index.core import StorageContext, VectorStoreIndex, Settings, Document, SimpleDirectoryReader
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from elasticsearch import AsyncElasticsearch
from llama_index.readers.file import (
DocxReader, HWPReader, PDFReader, EpubReader, FlatReader,
HTMLTagReader, ImageReader, IPYNBReader, MarkdownReader,
MboxReader, PptxReader, PandasCSVReader, PyMuPDFReader,
XMLReader, PagedCSVReader, CSVReader
)
app = Flask(__name__)
# 配置类
class Ai():
base_url = "http://localhost:11434"
es_url = "http://localhost:9200"
es_index = "spring-ai-index-test"
model = "qwen2.5:7b"
embedding = "bge-m3"
upload_path= "./uploads"
# 确保上传目录存在
os.makedirs(Ai.upload_path, exist_ok=True)
# 解析器映射
PARSERS = {
".pdf": PDFReader(),
".docx": DocxReader(),
".hwp": HWPReader(),
".epub": EpubReader(),
".txt": FlatReader(),
".html": HTMLTagReader(),
".jpg": ImageReader(), ".jpeg": ImageReader(), ".png": ImageReader(),
".ipynb": IPYNBReader(),
".md": MarkdownReader(),
".mbox": MboxReader(),
".csv": PandasCSVReader(),
".xml": XMLReader()
}
# 全局模型配置
Settings.embed_model = OllamaEmbedding(model_name=Ai.embedding, base_url=Ai.base_url)
Settings.llm = Ollama(model=Ai.model, base_url=Ai.base_url)
# 同步策略
es_client = AsyncElasticsearch(Ai.es_url, request_timeout=60)
dense_vector_store = ElasticsearchStore(
es_client = es_client,
index_name = Ai.es_index
)
#新增文件入口
@app.route('/add')
def upload_form():
return render_template('upload.html')
#入口
@app.route('/')
def chat_html():
return render_template('chat.html')
#聊天入口
@app.route('/chat')
def chat():
q = request.args.get('q')
if not q:
return jsonify({"error": "No query provided"}), 400
return search(q)
#上传文件方法
@app.route('/upload', methods=['POST'])
def upload_file():
try:
if 'file' not in request.files:
return jsonify({"error": "No file part"}), 400
file = request.files['file']
# 获取文件扩展名
ext = os.path.splitext(file.filename)[-1].lower()
if ext not in PARSERS:
return jsonify({"error": "Unsupported file type"}), 400
# 保存文件
file_path = os.path.join(Ai.upload_path, file.filename)
file.save(file_path)
# 解析文件
parser = PARSERS[ext]
file_extractor = {ext: parser}
documents = SimpleDirectoryReader(
Ai.upload_path, file_extractor=file_extractor
).load_data()
if not documents:
return jsonify({"error": "No content extracted from file"}), 400
# 确保 documents 是一个列表
if not isinstance(documents, list):
documents = [documents]
# 确保文档类型是 Document ,如果是字符串需要手动包装
documents = [doc if isinstance(doc, Document) else Document(doc) for doc in documents]
# 存储到索引
storage_context = StorageContext.from_defaults(vector_store=dense_vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context
)
# 删除本地上传的文件
os.remove(file_path)
return jsonify({
"message": "File processed and stored successfully",
"filename": file.filename
}), 200
except UnicodeDecodeError:
return jsonify({"error": "File must be text encoded in UTF-8"}), 400
except Exception as e:
return jsonify({"error": str(e)}), 500
# 查询文档
def search(query: str):
try:
storage_context = StorageContext.from_defaults(vector_store=dense_vector_store)
index = VectorStoreIndex.from_vector_store(
vector_store=dense_vector_store,
storage_context=storage_context
)
query_engine = index.as_query_engine()
# 直接调用同步 query 方法
response = query_engine.query(query)
return str(response)
except Exception as e:
return f"Error: {str(e)}"
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
第一次对话正常,第二次就报错 Error: Timeout context manager should be used inside a task
![]() |
1
hellojay 1 天前
这么多代码还涉及这么多库,建议找个 ai 问问啊
|