ytsearch 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. #!/usr/bin/python3
  2. from tg_youtube_search import YoutubeSearch
  3. import os
  4. import configparser
  5. import sys, getopt
  6. import requests, json
  7. import random, string
  8. import logging
  9. ##Import list of songs, artists from list.txt
  10. ##parse this file into a list of dictionaries
  11. ##for each dictionary pair, search youtube and output formatted links in file
  12. ##
  13. ## - Download over tor
  14. ## - Check if Tor installed
  15. ## - Warn user if NOT using tor
  16. ##
  17. ## - Multi-thread conversion to audio format
  18. ##
  19. ## - Logging
  20. ##
  21. ## - Check for dependencies
  22. ## - Tor?
  23. ## - Write Permissions in log files?
  24. ## - YoutubeSearch
  25. ##
  26. ## - Differentiate between releases and masters when calling api
  27. ##
  28. ## - check for dependencies on start
  29. ## - youtube-dl
  30. ## - tor (if used)
  31. ##
  32. ## - Download the first X results from YoutubeSearch and pick best result
  33. ##
  34. ## - Allow interrupt to stop script (CTRL + C)
  35. ##
  36. ##Vars
  37. if os.path.exists('config.ini'):
  38. config = configparser.ConfigParser()
  39. config.read('config.ini')
  40. else:
  41. print("Config.ini file not found in", os.getcwd())
  42. print("Exiting...")
  43. exit(1)
  44. VERSION="0.0.4"
  45. DOWNLOAD=config['DEFAULT'].getboolean('Download') #Download True/False
  46. MUSICFILE=config['DEFAULT']['Musicfile'] #location of text file containing songs
  47. RETRIES=config['DEFAULT'].getint('Retries') #Number of retries to search for songs
  48. LOGPATH=config['DEFAULT']['LogLocation']
  49. ITERATOR=0 #Number or current tries
  50. STORAGEPATH=config['DEFAULT']['DefaultStoragePath']
  51. VERBOSITY=config['DEFAULT'].getint('Verbosity')
  52. KEY=config['DEFAULT']['Key']
  53. DISCOG=""
  54. DESTFOLDER=""
  55. ALBUM=""
  56. ARTIST=""
  57. MASTER=True
  58. TOR=False
  59. TESTFOLDER=config['DEFAULT'].getboolean('TestFolder')
  60. HELP= "Takes list.txt or Discogs.com Master/Release number \n" \
  61. "And downloads albums by stripping the audio from Yuotube videos. \n" \
  62. "USAGE: ytsearch [-flag] [Discog Num] \n" \
  63. " -h This help file \n" \
  64. " -d --discog set Discog.com Release or Master number \n" \
  65. " -r --release search for Discog.com RELEASE instead of MASTER [default MASTER] \n" \
  66. " -D --download override config.ini and set Download=True \n" \
  67. " -f --file Allows quick download of a single Youtube link \n" \
  68. " -t --tor Will perform actions over tor using torsocks\n" \
  69. # " -D --download override config.ini and set Download=True"
  70. JSONDATA=[]
  71. music=[] # list to hold dictionaries of songnum, Title, Artist
  72. logresults=[] # list to hold results of link creation attempts, and download attempts
  73. linkresults=[] #
  74. completed=[] # list to hold song numbers of completed downloads so they can be removed from MUSICFILE
  75. def msg(message, level):
  76. ##Takes message and log level as arguments
  77. ##Based on verbosity from config.ini, return messages to user
  78. ## 1-ERROR, 2-WARN, 3-INFO, -1 [No flag, always show]
  79. tlevel = {-1: '', 1: "ERROR", 2: "WARN", 3: "INFO"}
  80. if level <= VERBOSITY:
  81. print(tlevel.get(level), message)
  82. ## Add logging ##T
  83. def arguments(argv):
  84. msg("Starting arguments", 3)
  85. try:
  86. opts, args = getopt.getopt(argv, "hvrtDf:d:", ["discog", "release", "help", "download", "version", "file", "tor"])
  87. for opt, arg in opts:
  88. if opt in ('-h', '--help'):
  89. print(HELP)
  90. sys.exit()
  91. elif opt in ("-d", "--discog"):
  92. global DISCOG
  93. DISCOG = arg
  94. msg("Discog number:" + DISCOG, 3)
  95. elif opt in ("-D", "--download"):
  96. global DOWNLOAD
  97. DOWNLOAD = arg
  98. msg("Override DOWNLOAD from agrs", 2)
  99. elif opt in ("-v", "--version"):
  100. msg("Version: " + VERSION, -1)
  101. sys.exit()
  102. elif opt in ("-r", "--release"):
  103. global MASTER
  104. MASTER=False
  105. msg("searching for Release, not Master at Discogs.com", 1)
  106. elif opt in ("-f", "--file"):
  107. msg("call singlesong with: " + arg, 3)
  108. singlesong(arg)
  109. elif opt in ("-t", "--tor"):
  110. msg("perform operations over tor", 1)
  111. if not checktor():
  112. msg("Torsocks not found! Check that torsocks is in /usr/bin/torsocks",1)
  113. else:
  114. TOR=True
  115. pass
  116. except getopt.GetoptError as err:
  117. msg("cannot get arguments, {0}".format(err), 1)
  118. def checktor():
  119. msg("Starting checktor", 3)
  120. if not os.system('which torsocks'):
  121. msg("Torsocks installed!", 3)
  122. return 1
  123. else:
  124. return 0
  125. def fetchjson(discogno, master=True, show=True):
  126. msg("Starting fetchjson", 3)
  127. if master:
  128. url = 'https://api.discogs.com/masters/'
  129. else:
  130. url = 'https://api.discogs.com/releases/'
  131. url = url + discogno
  132. msg("Downloading " + url, 1)
  133. r = requests.get(url)
  134. global JSONDATA
  135. # JSONDATA = r.json()
  136. JSONDATA = json.loads(r.text)
  137. msg("fetchjson complete!", 3)
  138. return JSONDATA
  139. def buildlist(jsondata, write=False):
  140. ## takes raw jsons data from Discogs.com and extracts Album, Artist, and tracklist list
  141. ## passes tracklist to gettracks to create list of track names
  142. ## appends "," + artist to end of track names
  143. ## calls buildfolders to create a home for the new file
  144. ## writes list to list.txt in appropriate folder
  145. msg("Staring Buildlist", 3)
  146. try:
  147. Artist = jsondata['artists'][0]['name']
  148. Album = jsondata['title']
  149. except:
  150. msg("Could not read Artist or Album Name from Jsonfile", 1)
  151. print(sys.exc_info()[0])
  152. sys.exit()
  153. if Artist.find( '(' ) != -1: ## Discovered a Artist 'Tool (2)' (Discogs 1181). This removes ()
  154. Artist = Artist[:Artist.find( '(' )-1]
  155. msg("Correcting Artist name to " + Artist, 2)
  156. global ALBUM
  157. try:
  158. ALBUM = Album
  159. msg("Set ALBUM var to: " + ALBUM, 3)
  160. except Exception as e:
  161. msg("Could not set ALBUM var." + e, 2)
  162. else:
  163. print(Artist)
  164. tracks = gettracks(jsondata['tracklist'])
  165. for i in range(len(tracks)):
  166. tracks[i] = tracks[i] + ", " + Artist
  167. if write:
  168. with open('list.txt', 'w') as f:
  169. for j in range (len(tracks)):
  170. f.write(tracks[j] + "\n")
  171. f.close()
  172. def gettracks(tracks):
  173. ## takes raw json data from Discogs.com and creates a tracklist for the album
  174. ## This will return a list
  175. msg("Starting gettracks", 3)
  176. goodtracks = []
  177. for track in tracks:
  178. goodtracks.append(track['title'].replace(',',''))
  179. return goodtracks
  180. def randomizer(length=8):
  181. #Creates a Random directory name
  182. randoms = string.ascii_letters + string.digits
  183. return ''.join((random.choice(randoms) for i in range(length)))
  184. def buildfolders(artist, album=""):
  185. ## Takes raw json data and creates foldes in parent_directory for Artist/Album
  186. msg("buildfolders started", 3)
  187. if len(ALBUM) == 0:
  188. album="UNKNOWN ALBUM"
  189. else:
  190. album = ALBUM.replace("'","")
  191. # msg("buildfolders local album is set to: " + album, 3)
  192. global DESTFOLDER
  193. DESTFOLDER = artist + "/" + album + "/"
  194. home = os.path.expanduser('~')
  195. if TESTFOLDER:
  196. DESTFOLDER = os.path.join(home, randomizer(), DESTFOLDER)
  197. DESTFOLDER = os.path.join(home, STORAGEPATH, DESTFOLDER)
  198. try:
  199. os.makedirs(DESTFOLDER)
  200. msg("Folder " + DESTFOLDER + " created", 2)
  201. except Exception as e:
  202. msg("Could not create destination folder!", 1)
  203. msg(e, 1)
  204. def readlist(file):
  205. msg("Starting readlist", 3)
  206. ##Open list.txt, read into music[]
  207. if not os.path.exists(MUSICFILE):
  208. msg("List.txt file not found. Exiting", 1)
  209. sys.exit()
  210. songnum = 0
  211. with open(file) as f:
  212. for line in f:
  213. song={}
  214. (key, val) = line.split(", ")
  215. songnum += 1
  216. song['songnum'] = songnum
  217. song['Title'] = key
  218. song['Artist'] = val.rstrip()
  219. music.append(song)
  220. f.close()
  221. return music
  222. def searchlinks(links, artist):
  223. ## Takes a list of dictionaries and parses the results
  224. ## Discards bad choices
  225. ## Returns a dictionary of one entry (best result)
  226. ## Good results include published by artist,
  227. ## bad results include words live "live" or "Video"
  228. msg("Starting searchlinks", 3)
  229. list_badterms = ["live", "video", "sexy"]
  230. ### FIX RANKINGS! ##
  231. for link in links:
  232. rating = 0
  233. for term in list_badterms:
  234. if term.lower() in link['title'].lower():
  235. # print("Contains Term!")
  236. rating -= 1
  237. if artist != "":
  238. if artist.lower() == link['publisher'].lower():
  239. # print("Published by Artist!")
  240. rating += 10
  241. link["rating"] = rating
  242. links.sort(reverse=True, key = lambda i: i['rating']) ## Sort links based on rating
  243. msg("Ending searchlinks", 3)
  244. return links[0]
  245. def generatelink(searchterm, max_results=10, tries=7):
  246. ## This will retry the link generation routine up to *tries* times and return results
  247. msg("Starting generatelink for " + searchterm, 3)
  248. counter = 0
  249. while counter <= tries:
  250. try:
  251. ytresult = YoutubeSearch(searchterm, max_results).to_dict()
  252. if len(ytresult) > 0:
  253. msg("Link Generated!", 3)
  254. break
  255. else:
  256. raise IndexError("Index Empty")
  257. except:
  258. msg("Unable to generate link on try " + str(counter), 3)
  259. counter += 1
  260. if counter >= tries:
  261. msg("Could Not Generate link for " + searchterm, 2)
  262. raise IndexError("Could not Generate Link")
  263. # finally:
  264. # msg("Ending generatelink on try " + str(counter), 3)
  265. # searchlinks(ytresult)
  266. return ytresult
  267. def parselist(musiclist):
  268. msg("Starting parselist", 3)
  269. global ITERATOR
  270. if ITERATOR == 0 and DOWNLOAD: ## <- Original Line
  271. # if ITERATOR == 0: ## <- Used only for testing buildfolders
  272. buildfolders(musiclist[0]['Artist'])
  273. ITERATOR+=1
  274. for song in musiclist:
  275. # searchterm = song['Title'] + " " + song['Artist'] + ' lyrics HD'
  276. searchterm = song['Title'] + " " + song['Artist']
  277. dictlink={}
  278. try:
  279. ytresult = generatelink(searchterm)
  280. bestlink = searchlinks(ytresult, song['Artist'])
  281. # ytresult = generatelink(searchterm)
  282. # link = 'https://youtube.com' + ytresult[0]['link']
  283. link = 'https://youtube.com' + bestlink['link']
  284. logresults.append(song['Title'] + ", " + song['Artist'] + " Link Created")
  285. if DOWNLOAD:
  286. msg("Attempting to download " + song['Title'], 2)
  287. downloadsong(link, song)
  288. else:
  289. print("Not downloading " + song['Title'] + ". Change this in config.ini")
  290. except Exception as ex:
  291. print(song['Title'], ex)
  292. # searchlinks(ytresult, song['Artist'])
  293. if DOWNLOAD:
  294. cleanup(MUSICFILE)
  295. def downloadsong(link, song):
  296. msg("Starling Downloadsong for " + song['Title'], 3)
  297. msg("Downloadsong DESTFOLDER: " + DESTFOLDER, 3)
  298. try:
  299. if TOR:
  300. os.system("torsocks youtube-dl --extract-audio --audio-format best --audio-quality 0 --output ''" + DESTFOLDER + "%(title)s.%(ext)s' --ignore-errors " + link)
  301. else:
  302. os.system("youtube-dl --extract-audio --audio-format best --audio-quality 0 --output '''" + DESTFOLDER + "%(title)s.%(ext)s' --ignore-errors " + link)
  303. completed.append(song['songnum'])
  304. logresults.append(song['Title'] + ", " + song['Artist'] + " Audio downloaded")
  305. msg(song['Title'] + " Download Complete!", 2)
  306. except e as youtubedlexception:
  307. logresults.append(song['Title'] + ", " + song['Artist'] + " FAILED TO DOWNLOAD SONG (youtube-dl)")
  308. print(youtubedlexception)
  309. def singlesong(link):
  310. try:
  311. os.system("youtube-dl --extract-audio --audio-format best --audio-quality 0 --output '%(title)s.%(ext)s' --ignore-errors " + link)
  312. except Exception as e:
  313. msg("Could not download file. " + e, 1)
  314. sys.exit()
  315. def cleanup(file):
  316. print("Cleaning completed files from list")
  317. print("Completed Downloads:", completed)
  318. linenum=0
  319. count=0
  320. with open(file, "r") as f:
  321. lines = f.readlines()
  322. with open(file, "w") as f:
  323. for line in lines:
  324. linenum += 1
  325. if linenum not in completed:
  326. f.write(line)
  327. count += 1
  328. f.close()
  329. if count >=1:
  330. print(count, "TRACKS REMAIN")
  331. print(RETRIES - ITERATOR, "tries remaining")
  332. if ITERATOR <= RETRIES:
  333. print("Retrying")
  334. global music
  335. music = []
  336. parselist(readlist(MUSICFILE))
  337. else:
  338. msg("All downloads complete!", -1)
  339. if __name__ == "__main__":
  340. arguments(sys.argv[1:])
  341. if DISCOG != "":
  342. msg("DISCOG found, fetch json", 3)
  343. buildlist(fetchjson(DISCOG), MASTER)
  344. readlist(MUSICFILE)
  345. parselist(music)
  346. print("ytsearch complete, exiting")