Support youtube videos

Kaveen Kumarasinghe 2 years ago
parent b60bcfd68d
commit 3ed55b556f

@ -10,9 +10,10 @@ from typing import List, Optional
from pathlib import Path
from datetime import date, datetime
from gpt_index.readers import YoutubeTranscriptReader
from gpt_index.readers.schema.base import Document
from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader, QuestionAnswerPrompt, BeautifulSoupWebReader, \
GPTFaissIndex, GPTListIndex, QueryMode, GPTTreeIndex
GPTFaissIndex, GPTListIndex, QueryMode, GPTTreeIndex, GoogleDocsReader
from gpt_index.readers.web import DEFAULT_WEBSITE_EXTRACTOR
from gpt_index.composability import ComposableGraph
@ -78,9 +79,21 @@ class Index_handler:
document = SimpleDirectoryReader(file_path).load_data()
index = GPTSimpleVectorIndex(document)
return index
def index_gdoc(self, doc_id):
document = GoogleDocsReader().load_data(doc_id)
index = GPTSimpleVectorIndex(document)
return index
def index_youtube_transcript(self, link):
documents = YoutubeTranscriptReader().load_data(ytlinks=[link])
index = GPTSimpleVectorIndex(documents)
return index
def index_load_file(self, file_path):
index = GPTSimpleVectorIndex.load_from_disk(file_path)
return index
def index_discord(self, document):
index = GPTSimpleVectorIndex(document)
return index
@ -128,8 +141,11 @@ class Index_handler:
# TODO Link validation
try:
index = await self.loop.run_in_executor(None, partial(self.index_webpage, link))
# Check if the link contains youtube in it
if "youtube" in link:
index = await self.loop.run_in_executor(None, partial(self.index_youtube_transcript, link))
else:
index = await self.loop.run_in_executor(None, partial(self.index_webpage, link))
# Make the url look nice, remove https, useless stuff, random characters
file_name = link.replace("https://", "").replace("http://", "").replace("www.", "").replace("/", "_").replace("?", "_").replace("&", "_").replace("=", "_").replace("-", "_").replace(".", "_")

@ -33,6 +33,7 @@ dependencies = [
"beautifulsoup4",
"gpt-index",
"PyPDF2",
"youtube_transcript_api",
]
dynamic = ["version"]
[project.scripts]

@ -11,4 +11,5 @@ backoff==2.2.1
flask==2.2.2
beautifulsoup4==4.11.1
gpt-index==0.3.4
PyPDF2==3.0.1
PyPDF2==3.0.1
youtube_transcript_api==0.5.0
Loading…
Cancel
Save