__init__.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import urllib.parse
  4. import json
  5. class YoutubeSearch:
  6. def __init__(self, search_terms: str, max_results=None):
  7. self.search_terms = search_terms
  8. self.max_results = max_results
  9. self.videos = self.search()
  10. def search(self):
  11. encoded_search = urllib.parse.quote(self.search_terms)
  12. BASE_URL = "https://youtube.com"
  13. url = f"{BASE_URL}/results?search_query={encoded_search}&pbj=1"
  14. response = BeautifulSoup(requests.get(url).text, "html.parser")
  15. results = self.parse_html(response)
  16. if self.max_results is not None and len(results) > self.max_results:
  17. return results[:self.max_results]
  18. return results
  19. def parse_html(self, soup):
  20. results = []
  21. for video_div in soup.select("div.yt-lockup-content"):
  22. video = video_div.select_one(".yt-uix-tile-link")
  23. publisher = video_div.select_one(".yt-lockup-byline") ##
  24. if video is not None:
  25. if video["href"].startswith("/watch?v="):
  26. channel = video_div.select_one("a.spf-link")
  27. video_info = {
  28. "title": video["title"],
  29. "link": video["href"],
  30. "id": video["href"][video["href"].index("=")+1:],
  31. "channel_name": channel.text,
  32. "channel_link": channel["href"],
  33. "publisher": publisher.text ##
  34. }
  35. results.append(video_info)
  36. return results
  37. def to_dict(self):
  38. return self.videos
  39. def to_json(self):
  40. return json.dumps({"videos": self.videos})