Skip to content

Commit afb481b

Browse files
Create app.py
Developed by me as a company hiring assesment project to findbest youtube videos using yt api and gemini llm at backend
1 parent a6901bf commit afb481b

File tree

1 file changed

+180
-0
lines changed
  • Youtube_video_finder_using_geminillm

1 file changed

+180
-0
lines changed
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import os
2+
import datetime
3+
from googleapiclient.discovery import build
4+
import google.generativeai as genai
5+
6+
# ——— CONFIG ———
7+
# Initialize clients with environment variables
8+
yt = build("youtube", "v3", developerKey=os.environ["YT_API_KEY"])
9+
10+
# Configure the Google Generative AI client
11+
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
12+
13+
# Initialize the Gemini model
14+
model = genai.GenerativeModel('gemini-1.5-flash-latest')
15+
16+
17+
def search_videos(query, max_filtered_results=20):
18+
"""
19+
Search for YouTube videos matching a query, filtering by recency and duration.
20+
21+
This function keeps searching until it finds enough videos that meet the criteria
22+
or exhausts the search results.
23+
"""
24+
# Calculate publishedAfter timestamp (14 days ago)
25+
fourteen_days_ago = (datetime.datetime.utcnow()
26+
- datetime.timedelta(days=14)).isoformat("T") + "Z"
27+
28+
filtered_videos = []
29+
next_page_token = None
30+
page_count = 0
31+
max_pages = 5 # Limit the number of pages to search to avoid excessive API calls
32+
33+
# Continue searching until we have enough filtered videos or run out of results
34+
while len(filtered_videos) < max_filtered_results and page_count < max_pages:
35+
# Step 1: Search for videos matching the query
36+
search_response = yt.search().list(
37+
q=query,
38+
part="id,snippet",
39+
type="video",
40+
order="relevance",
41+
publishedAfter=fourteen_days_ago,
42+
maxResults=50, # Maximum allowed by the API
43+
pageToken=next_page_token
44+
).execute()
45+
46+
page_count += 1
47+
48+
# Step 2: Collect video IDs from this page
49+
video_ids = [item["id"]["videoId"] for item in search_response.get("items", [])]
50+
51+
# Break if no more videos found
52+
if not video_ids:
53+
break
54+
55+
# Step 3: Get details for the fetched videos
56+
details = yt.videos().list(
57+
part="contentDetails,snippet",
58+
id=",".join(video_ids)
59+
).execute()
60+
61+
# Step 4: Filter by duration (4–20 minutes)
62+
for item in details.get("items", []):
63+
try:
64+
# Parse duration (ISO 8601 format, e.g. "PT5M30S")
65+
dur = item["contentDetails"]["duration"].replace("PT","")
66+
67+
# Skip videos with hours or without minutes
68+
if "H" in dur or "M" not in dur:
69+
continue
70+
71+
# Split minutes and seconds
72+
parts = dur.split("M")
73+
mins = int(parts[0])
74+
secs = parts[1].replace("S","") if len(parts) > 1 else "0"
75+
seconds = int(secs) if secs else 0
76+
77+
total_seconds = mins * 60 + seconds
78+
79+
# Filter by duration (4 to 20 minutes inclusive)
80+
if 4 * 60 <= total_seconds <= 20 * 60:
81+
filtered_videos.append({
82+
"id": item["id"],
83+
"title": item["snippet"]["title"],
84+
"duration": total_seconds,
85+
"publishedAt": item["snippet"]["publishedAt"]
86+
})
87+
88+
# If we've found enough videos, we can stop
89+
if len(filtered_videos) >= max_filtered_results:
90+
break
91+
except Exception as e:
92+
print(f"Could not parse duration for video {item.get('id', 'N/A')}: {e}")
93+
continue
94+
95+
# Check if there are more pages of results
96+
next_page_token = search_response.get("nextPageToken")
97+
if not next_page_token:
98+
break
99+
100+
print(f"Found {len(filtered_videos)} qualifying videos so far. Searching next page...")
101+
102+
print(f"Search completed. Found {len(filtered_videos)} videos meeting criteria.")
103+
return filtered_videos
104+
105+
106+
def score_title(title, query):
107+
"""Score a video title's relevance to the query using Gemini AI."""
108+
prompt = (
109+
f"Query: {query}\n"
110+
f"Title: {title}\n"
111+
"Rate relevance & quality 1–10 (just give the number)."
112+
)
113+
try:
114+
response = model.generate_content(prompt)
115+
score_text = response.text.strip()
116+
# Try to extract just the number if there's additional text
117+
import re
118+
match = re.search(r'\b([0-9]|10)(\.[0-9]+)?\b', score_text)
119+
if match:
120+
score = float(match.group(0))
121+
else:
122+
score = float(score_text)
123+
return score
124+
except ValueError:
125+
print(f"Model returned non-numeric score for '{title}': '{score_text}'")
126+
return 5.0 # Default middle score instead of 0
127+
except Exception as e:
128+
print(f"Error scoring title '{title}': {e}")
129+
if 'response' in locals() and hasattr(response, 'text'):
130+
print(f"API response text: {response.text}")
131+
return 5.0 # Default middle score
132+
133+
134+
def pick_best(query, num_results=20):
135+
"""
136+
Find and score the best YouTube videos for a query.
137+
138+
Args:
139+
query: Search query string
140+
num_results: Number of top videos to return
141+
"""
142+
# Get more videos than we need to ensure we have enough after scoring
143+
vids = search_videos(query, max_filtered_results=max(30, num_results * 1.5))
144+
145+
if not vids:
146+
print("No suitable videos found after applying filters.")
147+
return
148+
149+
# Score each video
150+
print(f"Scoring {len(vids)} videos...")
151+
for i, v in enumerate(vids):
152+
v["score"] = score_title(v["title"], query)
153+
print(f" Scored video {i+1}/{len(vids)}: '{v['title']}' - Score: {v['score']:.2f}")
154+
155+
# Sort by score in descending order
156+
vids.sort(key=lambda x: x.get("score", 0.0), reverse=True)
157+
158+
# Print the top num_results
159+
result_count = min(num_results, len(vids))
160+
print(f"\n--- Top {result_count} Relevant Videos ---")
161+
162+
for i, video in enumerate(vids[:num_results]):
163+
print(f"\n{i+1}.")
164+
print(f" • Title: {video.get('title', 'N/A')}")
165+
print(f" • URL: https://youtu.be/{video.get('id', 'N/A')}")
166+
print(f" • Score: {video.get('score', 0.0):.2f}")
167+
duration_sec = video.get('duration', 0)
168+
print(f" • Duration: {duration_sec // 60}m{duration_sec % 60:02d}s")
169+
print(f" • Published: {video.get('publishedAt', 'N/A')}")
170+
171+
172+
# —— RUN IT! ——
173+
if __name__ == "__main__":
174+
# Check if API keys are set
175+
if "YT_API_KEY" not in os.environ or "GEMINI_API_KEY" not in os.environ:
176+
print("Error: YouTube and/or Gemini API keys not set in environment variables.")
177+
else:
178+
user_query = input("Enter your search (voice-to-text or text): ")
179+
# Call pick_best with the desired number of results
180+
pick_best(user_query, num_results=20)

0 commit comments

Comments
 (0)