Commit e8ad6894 by Mingju

Initial commit

parents
import aiohttp
from chroma_memory_stream import MemoryStream
api_file = 'key.txt'
def run_gpt_event_poignancy_prompt(api_key_f, record):
raise NotImplementedError
def run_gpt_chat_poignancy_prompt(api_key_f, chat_history):
raise NotImplementedError
def run_gpt_summary_event_prompt(api_key_f, record):
raise NotImplementedError
def run_gpt_summary_chat_prompt(api_key_f, chat_history):
raise NotImplementedError
def parse_observations(observation:dict):
description = ''
description += f"Name: {observation['name']}\n"
description += f"Entities: {observation['entities']}\n"
description += f"Health: {observation['health']} Food: {observation['food']}" +\
f"Saturation: {observation['saturation']} Oxygen: {observation['oxygen']}\n"
description += f""
pass
class MemoryAPIs(object):
def __init__(self, db_file_name, collection_name) -> None:
self.memory_stream = MemoryStream(db_file_name=db_file_name, collection_name=collection_name)
async def receive_web_info(self, ip, port):
pass
def add_chat_to_mem(self, chat_history, now_time):
"""
Add chat history to the memory.
The *chat_history* should be formulated as below:
Ann: How's everything going?
Bill: Fine.
...
"""
r_type = 'chat'
description = chat_history
code = 'NULL'
summary = run_gpt_summary_chat_prompt(api_key_f=api_file, chat_history=description)
poignancy = run_gpt_chat_poignancy_prompt(api_key_f=api_file, chat_history=description)
self.memory_stream.add_record_in_mem(r_type, description, code,
summary, now_time, poignancy)
def add_event_to_mem(observation, now_time):
"""
Add observation to the memory.
The *observation* is a python dict
"""
r_type = 'event'
pass
def add_skill_to_mem(skill_info, now_time):
pass
def add_bug_info_to_mem(bug_info, now_time):
pass
def delete_record_in_mem():
pass
def retrieve_mem():
raise NotImplementedError
\ No newline at end of file
import chromadb
import openai
from chromadb.utils import embedding_functions
example = {'node_id': '3', 'node_type':'chat', 'description': 'test',
'summary':'tst', 'code': 'NULL', 'last_accessed': '2023-1-10',
'created':'2023-1-0', 'embeddings':[1, 2, 3], 'poignancy': 10}
example1 = {'node_id': '2', 'node_type':'chat', 'description': 'test',
'summary':'ppp', 'code': 'NULL', 'last_accessed': '2023-1-10',
'created':'2023-1-0', 'embeddings':[1, 2, 3], 'poignancy': 10}
example2 = {'node_id': '10', 'node_type':'chat', 'description': 'test',
'summary':'tst', 'code': 'NULL', 'last_accessed': '2023-1-10',
'created':'2023-1-0', 'embeddings':[1, 2, 3], 'poignancy': 10}
def get_openai_key(key_path:str):
with open(key_path, 'r') as f:
res = f.read().strip()
return res
def get_collection(db_file_name:str, collection_name:str):
"""
Get the collection object according to
the name of the database and the name of the collection
"""
# openai_ef = embedding_functions.OpenAIEmbeddingFunction(
# api_key=get_openai_key("key.txt"),
# model_name="text-embedding-ada-002"
# )
default_ef = embedding_functions.DefaultEmbeddingFunction()
client = chromadb.PersistentClient(path=db_file_name)
collection = client.get_or_create_collection(name=collection_name,
metadata={"hnsw:space": "cosine"},
embedding_function=default_ef)
return collection
def add_record(collection:chromadb.Collection, records:list[dict]):
"""
Insert these records in the collection.
We use *summary* (or *description* of the skill) as the documents and
other fields as the metadata. We calculate the embeddings of the records.
"""
ids = []
docs = []
metadatas = []
for record in records:
ids.append(record['node_id'])
metadata = {}
metadata['node_type'] = record['node_type']
metadata['last_accessed'] = record['last_accessed']
metadata['created'] = record['created']
metadata['poignancy'] = record['poignancy']
metadata['code'] = record['code']
metadata['description'] = record['description']
metadatas.append(metadata)
if record['node_type'] == 'skill':
docs.append(record['description'])
else:
docs.append(record['summary'])
collection.add(ids=ids, metadatas=metadatas, documents=docs)
def delete_record(collection:chromadb.Collection, records:list[dict]):
"""
Delete record whose node_id == record['node_id']
If there is no this record, do nothing.
"""
ids = []
for record in records:
ids.append(record['node_id'])
collection.delete(ids=ids)
def query_relevance_record(collection:chromadb.Collection, keys:list[str], topK:int=50):
"""
Query the highest relevant 50 records from the database
according to the search keys.
"""
return collection.query(
query_texts=keys,
n_results=topK
)
def query_most_piognancy_record(collection:chromadb.Collection):
"""
Query the records which have the highest poignancy values.
"""
return collection.get(
where={
"poignancy" : 10
}
)
def update_record(collection:chromadb.Collection, records:list[dict]):
"""
Update records in the collection.
"""
ids = []
docs = []
metadatas = []
for record in records:
ids.append(record['node_id'])
metadata = {}
metadata['node_type'] = record['node_type']
metadata['last_accessed'] = record['last_accessed']
metadata['created'] = record['created']
metadata['poignancy'] = record['poignancy']
metadata['code'] = record['code']
metadata['description'] = record['description']
metadatas.append(metadata)
if record['node_type'] == 'skill':
docs.append(record['description'])
else:
docs.append(record['summary'])
collection.update(ids=ids, documents=docs, metadatas=metadatas)
if __name__ == "__main__":
collection = get_collection('test', 'test')
add_record(collection, [example, example1, example2])
# add_record(collection, [example1])
print(query_relevance_record(collection, "tst"))
print(query_most_piognancy_record(collection))
# update_record(collection, [example1])
# delete_record(collection, [example])
# delete_record(collection, [example1])
from chroma_db import *
import json
class ConceptNode(object):
def __init__(self, node_id, node_type,
description, summary, code,
poignancy, last_accessed, created):
self.node_id = node_id
self.node_type = node_type
self.description = description
self.summary = summary
self.code = code
self.poignancy = poignancy
self.last_accessed = last_accessed
self.created = created
def to_json(self):
return {
'node_id': self.node_id,
'node_type': self.node_type,
'description': self.description,
'summary': self.summary,
'code': self.code,
'poignancy': self.poignancy,
'last_accessed': self.last_accessed,
'created': self.created
}
class MemoryStream(object):
def __init__(self, db_file_name, collection_name):
self.collection = get_collection(db_file_name, collection_name)
self.now_id = 1
def add_record_in_mem(self, r_type, description, code,
summary, now_time, poignancy):
node_id = self.now_id
self.now_id += 1
node = ConceptNode(node_id, r_type, description,
summary, code, poignancy,
last_accessed=now_time, created=now_time)
add_record(self.collection, node.to_json())
def add_bug_report_in_mem(self, description, summary,
now_time):
node_id = self.now_id
self.now_id += 1
node = ConceptNode(node_id, 'event', description,
summary, 'NULL', 10,
last_accessed=now_time, created=now_time)
add_record(self.collection, node.to_json())
import numpy as np
from chroma_db import *
import chromadb
recency_decay = 0.9
gw = [1, 1, 1]
max_poig = 10
def extract_relevance(collection:chromadb.Collection, msgs:list[str]):
"""
Extract the top 50 most relevant records with msg
from collection
"""
relevance_records = query_relevance_record(collection, msgs, topK=50)
relevance = []
for distance in relevance_records['distances'][0]:
relevance.append(1-distance)
return relevance, relevance_records
def extract_poignance(records:list[dict]):
"""
Extract the poigancy value from records extracted
from **extract_relevance**
"""
poignance = []
for metadata in records['metadatas'][0]:
poignance.append(metadata['poignancy'])
return poignance
def extract_most_poignant(collection:chromadb.Collection):
"""
Retriece records with highest poignance.
"""
records = query_most_piognancy_record(collection)
def extract_recency(records:list[dict]):
"""
Extract the recency value from records extracted
from **extract_relevance**
"""
recency = []
for i, metadata in enumerate(records['metadatas'][0]):
recency.append([i, metadata['last_accessed']])
recency.sort(key=lambda x: x[1], reverse=True)
quat_recency = []
for i, recency_e in enumerate(recency):
quat_recency.append((recency_e[0], recency_decay**i))
return list(map(lambda x: x[1], quat_recency))
def top_highest_k_values(scores, topK=30):
idx = list(range(len(scores)))
scores_with_idx = list(zip(idx, scores))
scores_with_idx.sort(key=lambda x: x[1], reverse=True)
return scores_with_idx[:topK]
def get_res(records, idx):
res = {
'node_id': records['ids'][0][idx],
'node_type': records['metadatas'][0][idx]['node_type'],
'last_accessed': records['metadatas'][0][idx]['last_accessed'],
'created': records['metadatas'][0][idx]['created'],
'poignancy': records['metadatas'][0][idx]['poignancy'],
'code': records['metadatas'][0][idx]['code'],
'description': records['metadatas'][0][idx]['description'],
}
if res['node_type'] != 'skill':
res['summary'] = records['documents'][0][idx]
return res
def retrieve(collection:chromadb.Collection, msgs:list[str], topK=2):
relevance, relevance_records = extract_relevance(collection, msgs)
poignance = extract_poignance(relevance_records)
recency = extract_recency(relevance_records)
scores = []
for i in range(len(relevance)):
score = relevance[i]*gw[0] + poignance[i]* gw[1] + recency[i] * gw[2]
scores.append(score)
scores_with_idx = top_highest_k_values(scores, topK=topK)
res = []
for idx, _ in scores_with_idx:
res.append(get_res(relevance_records, idx))
return res
if __name__ == '__main__':
collection = get_collection('test', 'test')
print(retrieve(collection, ['ppp']))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment