
# NOTE - if unable to install with pip install ..., use python3 -m pip install .....

#pip install langchain_ollama
#pip install faiss_cpu
# last yr code  changed for updates in langchain
# from langchain.document_loaders import PyPDFLoader,DirectoryLoader - outdated
# from langchain.embeddings import HuggingFaceEmbeddings -outdated
# from langchain.vectorstores import faiss_cpu - outdated  
# from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_huggingface import HuggingFaceEmbeddings
#from langchain_community.embeddings import SentenceTransformerEmbeddings

from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
from langchain_ollama import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain_community.vectorstores.faiss import FAISS

print("1")
DATA_PATH = "data/"
DB_FAISS_PATH = "vectorstore/db_faiss"

print("2")
#Create vector store
def create_vector_db():
    print("3")
    loader = DirectoryLoader(DATA_PATH,glob='*.pdf',loader_cls=PyPDFLoader)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50)
    texts = text_splitter.split_documents(documents)
    print("4")
    
    #embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2',
        #model_kwargs = {'device':'cpu'})
    #embeddings = OllamaEmbeddings(base_url="http://localhost:11434",model="llama3",show_progress=True)
    #embeddings = SentenceTransformerEmbeddings(model_name = "all-MiniLM-L6-v2")
    #embeddings = OllamaEmbeddings(model = "llama3")
    #embeddings = OllamaEmbeddings(model = "mxbai-embed-large")
    
    embeddings = OllamaEmbeddings(model = "nomic-embed-text")
    
    db = FAISS.from_documents(texts,embeddings)
    db.save_local(DB_FAISS_PATH)
    print("5")

if __name__ == '__main__':
    create_vector_db()