ソースを参照

added results-parser routine and import custom tg_youtube_search.py library

Miek Stagl 5 年 前
コミット
d2e7a091e5

BIN
.CHANGELOG.swp


BIN
.config.ini.swp


BIN
.ytsearch.swp


+ 5 - 2
CHANGELOG

@@ -19,10 +19,13 @@ Version 0.0.3
     - Throws a warning if it cannot create a link
   - modified YoutubeSearch library to add publisher info
     - This will be helpful for narrowing searchs based on publisher in 0.0.4
+    - FUTURE - Consider importing code directly into this project to avoid updates breaking
 
 Version 0.0.4
-  - Attempts to capture first X results from YoutubeSearch
+  - Capture first X results from YoutubeSearch
     - This script will then parse the results for the best match
     - Remove links for things like (Official Video) or Cover
       - Need to ensure words like "Cover" are not in the actual song title!
-
+  - added tg_youtube_search.py to directory
+     - This is a fork of youtube_search with custom attributes
+     - ytsearch imports this library 

BIN
youtube_search/__pycache__/__init__.cpython-38.pyc → __pycache__/tg_youtube_search.cpython-38.pyc


BIN
__pycache__/youtube_search3.cpython-38.pyc


+ 0 - 0
youtube_search/__init__.py → tg_youtube_search.py


+ 47 - 0
youtube_search2/__init__.py

@@ -0,0 +1,47 @@
+import requests
+from bs4 import BeautifulSoup
+import urllib.parse
+import json
+
+
+class YoutubeSearch:
+
+    def __init__(self, search_terms: str, max_results=None):
+        self.search_terms = search_terms
+        self.max_results = max_results
+        self.videos = self.search()
+
+    def search(self):
+        encoded_search = urllib.parse.quote(self.search_terms)
+        BASE_URL = "https://youtube.com"
+        url = f"{BASE_URL}/results?search_query={encoded_search}&pbj=1"
+        response = BeautifulSoup(requests.get(url).text, "html.parser")
+        results = self.parse_html(response)
+        if self.max_results is not None and len(results) > self.max_results:
+            return results[:self.max_results]
+        return results
+
+    def parse_html(self, soup):
+        results = []
+        for video_div in soup.select("div.yt-lockup-content"):
+            video = video_div.select_one(".yt-uix-tile-link")
+            publisher = video_div.select_one(".yt-lockup-byline")	##
+            if video is not None:
+                if video["href"].startswith("/watch?v="):
+                    channel = video_div.select_one("a.spf-link")
+                    video_info = {
+                        "title": video["title"],
+                        "link": video["href"],
+                        "id": video["href"][video["href"].index("=")+1:],
+                        "channel_name": channel.text,
+                        "channel_link": channel["href"],
+                        "publisher": publisher.text			##
+                    }
+                    results.append(video_info)
+        return results
+
+    def to_dict(self):
+        return self.videos
+
+    def to_json(self):
+        return json.dumps({"videos": self.videos})

BIN
youtube_search2/__pycache__/__init__.cpython-38.pyc


+ 32 - 33
ytsearch

@@ -1,6 +1,6 @@
 #!/usr/bin/python3
 
-from youtube_search import YoutubeSearch
+from tg_youtube_search import YoutubeSearch
 import os
 import configparser
 import sys, getopt
@@ -44,7 +44,7 @@ else:
 	print("Exiting...")
 	exit(1)
 
-VERSION="0.0.3"
+VERSION="0.0.4"
 DOWNLOAD=config['DEFAULT'].getboolean('Download')	#Download True/False
 MUSICFILE=config['DEFAULT']['Musicfile']		#location of text file containing songs
 RETRIES=config['DEFAULT'].getint('Retries')		#Number of retries to search for songs
@@ -223,33 +223,32 @@ def readlist(file):
 	f.close()
 	return music
 
-#def searchlinks(links, artist):
-#	## Takes a list of dictionaries and parses the results
-#	## Discards bad choices
-#	## Returns a dictionary of one entry (best result)
-#	## Good results include published by artist,
-#	## bad results include words live "live" or "Video"
-#	msg("Starting searchlinks", 3)
-#	list_badterms = ["live", "video", "sexy"]
-#
-#	### FIX RANKINGS! ##
-#
-#	for link in links:
-#		rating = 0
-#		for term in list_badterms:
-#			if term.lower() in link['title'].lower():
-##				print("Contains Term!")
-#				rating -= 1
-#		print(rating)
-#		if artist != "":
-#			if artist.lower() == link['publisher'].lower():
-##				print("Published by Artist!")
-#				rating += 10
-#		link["rating"] = rating
-#
-#	links.sort(reverse=True, key = lambda i: i['rating'])	## Sort links based on rating
-#	msg("Ending serachlinks", 3)
-#	return links[0]
+def searchlinks(links, artist):
+	## Takes a list of dictionaries and parses the results
+	## Discards bad choices
+	## Returns a dictionary of one entry (best result)
+	## Good results include published by artist,
+	## bad results include words live "live" or "Video"
+	msg("Starting searchlinks", 3)
+	list_badterms = ["live", "video", "sexy"]
+
+	### FIX RANKINGS! ##
+
+	for link in links:
+		rating = 0
+		for term in list_badterms:
+			if term.lower() in link['title'].lower():
+#				print("Contains Term!")
+				rating -= 1
+		if artist != "":
+			if artist.lower() == link['publisher'].lower():
+#				print("Published by Artist!")
+				rating += 10
+		link["rating"] = rating
+
+	links.sort(reverse=True, key = lambda i: i['rating'])	## Sort links based on rating
+	msg("Ending searchlinks", 3)
+	return links[0]
 
 def generatelink(searchterm, max_results=10, tries=5):
 	## This will retry the link generation routine up to *tries* times and return results
@@ -288,11 +287,11 @@ def parselist(musiclist):
 	  dictlink={}
 
 	  try:
-##	    ytresult = generatelink(searchterm)
-##	    bestlink = searchlinks(ytresult, song['Artist'])
 	    ytresult = generatelink(searchterm)
-	    link = 'https://youtube.com' + ytresult[0]['link']
-#	    link = 'https://youtube.com' + bestlink['link']
+	    bestlink = searchlinks(ytresult, song['Artist'])
+#	    ytresult = generatelink(searchterm)
+#	    link = 'https://youtube.com' + ytresult[0]['link']
+	    link = 'https://youtube.com' + bestlink['link']
 	    logresults.append(song['Title'] + ", " + song['Artist'] + " Link Created")
 	    if DOWNLOAD:
               msg("Attempting to download " + song['Title'], 2)