Table of Contents

AI - LangChain

see also:

Introduction

There are 3 main concepts

Python coding example

from dotenv import load_dotenv, find_dotenv
load dot_env(find_dotenv)

from lanchain.llms import OpenAI
llm = OpenAI(model_name="text-davinci-003")
llm("explain large language models in one sentence")

from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(model_name="gpt-3.5-turbp",temperature=0.3)
messages = [
     SystemMessage(content="You are an expert data scientist")
     HumanMessage(content="Write a Python script that trains a neural network on data")
]
response = chat(messages)
# now see the output to ensure it works so far:
print(response.content,end='\n')

# now create mechanism for dynamic prompts:

from langchain import PromptTemplate

template = """
You are an expert data scientist with an expertise in building deep learning models.
Explain the concept of {concept} in a couple of lines
"""

prompt = PromptTemplate(
   input_variables=["concept"],
   template=template,
)

llm(prompt.format(concept="regularization"))

#now add in the chain:

from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

#run the chain only specifying the input variable:
print(chain.run("regularization"))

#now lets chain that output to a LLM input:
second_prompt = PromptTemplate(
   input_variables=["ml_concept"],
   template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words",
)

chain_two = LLMChain(llm=llm, prompt=second_prompt)

#combine the chains:
from langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True)

#now run it:
explanation = overall_chain.run("regularization")
print(explanation)
#now take this response and split it into chunks for storage in a Pinecone vector embedding datastore:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
     chunk_size = 100,
     chunk_overlap = 0,
)

texts = text_splitter.create_documents([explanation])

#display 1st line via:
texts[0].page_content

#now embed text:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model_name="ada")

#can view vector embeddings output:
query_result = embeddings.embed_query(texts[0].page_content)
query_result

#now store in PineCone:
import os
import pinecone
from langchain.vector_stores import Pinecone

#initialize Pinecone:
pinecone.init(
     api_key=os.getenv("PINECONE_API_KEY"),
     environment=os.getenv("PINECONE_ENV")
)

#store embeddings
index_name = "langchain-quickstart"
search = Pinecone.from_documents(texts, embeddings, index_name=index_name)

#query pinecone:
query = "What is magical about a regularizer?"
result = search.similarity_search(query)
#now create a langchain agent to have a LLM run Python code to solve natural language questions:
from langchain.agents.agent_toolkits import create_python_agent
from langchain.tools.python.tool import PythonREPLTool
from langchain.python import PythonREPL
from langchain.llms.openai import OpenAI

#create agent to get LLM to run Python code from natural language:
agent_executor = create_python_agent(
     llm=OpenAI(temperature=0, max_tokens=1000),
     tool=PythonREPLTool(),
     verbose=True
)

#example:
agent_executor.run("Find the roots (zeros) of the quadratic function 3 * x**2 + 2*x -1")