From bea1ff903d683c02ee3de13d0463be3671f42e3e Mon Sep 17 00:00:00 2001
From: Kaveen Kumarasinghe <ocfinancesmc@gmail.com>
Date: Sun, 5 Feb 2023 15:01:25 -0500
Subject: [PATCH] more token usage captures for indexes

---
 models/index_model.py | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/models/index_model.py b/models/index_model.py
index fd21dea..3dc5516 100644
--- a/models/index_model.py
+++ b/models/index_model.py
@@ -136,20 +136,20 @@ class Index_handler:
         )
 
     # TODO We need to do predictions below for token usage.
-    def index_file(self, file_path) -> GPTSimpleVectorIndex:
+    def index_file(self, file_path, embed_model) -> GPTSimpleVectorIndex:
         document = SimpleDirectoryReader(file_path).load_data()
-        index = GPTSimpleVectorIndex(document)
+        index = GPTSimpleVectorIndex(document, embed_model=embed_model)
         return index
 
-    def index_gdoc(self, doc_id) -> GPTSimpleVectorIndex:
+    def index_gdoc(self, doc_id, embed_model) -> GPTSimpleVectorIndex:
         document = GoogleDocsReader().load_data(doc_id)
-        index = GPTSimpleVectorIndex(document)
+        index = GPTSimpleVectorIndex(document, embed_model=embed_model)
         return index
 
-    def index_youtube_transcript(self, link):
+    def index_youtube_transcript(self, link, embed_model):
         documents = YoutubeTranscriptReader().load_data(ytlinks=[link])
         index = GPTSimpleVectorIndex(
-            documents,
+            documents, embed_model=embed_model,
         )
         return index
 
@@ -160,17 +160,17 @@ class Index_handler:
             index = GPTSimpleVectorIndex.load_from_disk(file_path)
         return index
 
-    def index_discord(self, document) -> GPTSimpleVectorIndex:
+    def index_discord(self, document, embed_model) -> GPTSimpleVectorIndex:
         index = GPTSimpleVectorIndex(
-            document,
+            document, embed_model=embed_model,
         )
         return index
 
-    def index_webpage(self, url) -> GPTSimpleVectorIndex:
+    def index_webpage(self, url, embed_model) -> GPTSimpleVectorIndex:
         documents = BeautifulSoupWebReader(
             website_extractor=DEFAULT_WEBSITE_EXTRACTOR
         ).load_data(urls=[url])
-        index = GPTSimpleVectorIndex(documents)
+        index = GPTSimpleVectorIndex(documents, embed_model=embed_model)
         return index
 
     def reset_indexes(self, user_id):
@@ -215,9 +215,11 @@ class Index_handler:
                     suffix=suffix, dir=temp_path, delete=False
                 ) as temp_file:
                     await file.save(temp_file.name)
+                    embedding_model = OpenAIEmbedding()
                     index = await self.loop.run_in_executor(
-                        None, partial(self.index_file, temp_path)
+                        None, partial(self.index_file, temp_path, embedding_model)
                     )
+                    await self.usage_service.update_usage(embedding_model.last_token_usage)
 
             file_name = file.filename
             self.index_storage[ctx.user.id].add_index(index, ctx.user.id, file_name)
@@ -236,15 +238,17 @@ class Index_handler:
 
         # TODO Link validation
         try:
+            embedding_model = OpenAIEmbedding()
             # Check if the link contains youtube in it
             if "youtube" in link:
                 index = await self.loop.run_in_executor(
-                    None, partial(self.index_youtube_transcript, link)
+                    None, partial(self.index_youtube_transcript, link, embedding_model)
                 )
             else:
                 index = await self.loop.run_in_executor(
-                    None, partial(self.index_webpage, link)
+                    None, partial(self.index_webpage, link, embedding_model)
                 )
+            await self.usage_service.update_usage(embedding_model.last_token_usage)
 
             # Make the url look nice, remove https, useless stuff, random characters
             file_name = (
@@ -282,9 +286,11 @@ class Index_handler:
             document = await self.load_data(
                 channel_ids=[channel.id], limit=1000, oldest_first=False
             )
+            embedding_model = OpenAIEmbedding()
             index = await self.loop.run_in_executor(
-                None, partial(self.index_discord, document)
+                None, partial(self.index_discord, document, embedding_model)
             )
+            await self.usage_service.update_usage(embedding_model.last_token_usage)
             self.index_storage[ctx.user.id].add_index(index, ctx.user.id, channel.name)
             await ctx.respond("Index set")
         except Exception:
@@ -392,9 +398,11 @@ class Index_handler:
             document = await self.load_data(
                 channel_ids=channel_ids, limit=3000, oldest_first=False
             )
+            embedding_model = OpenAIEmbedding()
             index = await self.loop.run_in_executor(
-                None, partial(self.index_discord, document)
+                None, partial(self.index_discord, document, embedding_model)
             )
+            await self.usage_service.update_usage(embedding_model.last_token_usage)
             Path(app_root_path() / "indexes" / str(ctx.guild.id)).mkdir(
                 parents=True, exist_ok=True
             )