#!/usr/bin/env python3 import argparse import datetime import glob import json import music_tag import os from pathlib import Path import sys import youtube_dl DEBUG = False def cleanup_metadata_files(music_directory): files = glob.glob(os.path.join(music_directory, '*.json')) for file in files: os.remove(file) def format_youtube_date(date): default = "Unknown Year" try: fmt = "%Y%m%d" d = datetime.datetime.strptime(date, fmt) return d.year except Exception as ex: print(ex) return default def get_all_files(directory): things = glob.glob(os.path.join(directory, '*.mp3')) files = [] for thing in things: if os.path.isfile(thing): files.append(thing) return files def get_command_line_options(): parser = argparse.ArgumentParser( description="Download songs from YouTube Music") parser.add_argument("url", metavar="https://music.youtube.com/playlist?list=1234", nargs='?', default=None, type=str, help="Playlist or Song URL to download.") parser.add_argument("-c", "--cookies", metavar="cookiefile.txt", type=str, help="Cookie file to use.") parser.add_argument("-o", "--output", metavar="output_directory", type=str, help="Output directory to use") parser.add_argument("-r", "--retryFile", metavar="jamos_failed_urls.txt", default=None, type=str, help="Output directory to use") args = parser.parse_args() if args.retryFile and args.url: print("Cannot have url and retry flag!\n") parser.print_help() raise Exception() if not args.retryFile and not args.url: print("Must pass either a url or a retry file!\n") parser.print_help() raise Exception() return args # TODO: switch command line args to argparse def get_playlist_url(): return sys.argv[1] def get_video_urls_in_playlist(playlist_url, ytdl): videos = ytdl.extract_info(playlist_url, download=False) urls = [] for vid in videos['entries']: if 'webpage_url' in vid.keys() and vid['webpage_url'] is not None: urls.append(vid['webpage_url']) return urls def remove_special_characters_for_filename(filename): special_chars = [ ['-', ' '], ['(', ''], [')', ''], ['/', ' '], ['/', ' '], [' ', '_'], ["'", ''], ["&", 'and'], [chr(8217), ''], ['$', 's'], ['.', ''] ] new_name = filename for char_set in special_chars: new_name = new_name.replace(char_set[0], char_set[1]) return new_name.lower() def move_file(file, metadata, output_directory): artist = remove_special_characters_for_filename(metadata['artist']) album = remove_special_characters_for_filename(metadata['album']) title = remove_special_characters_for_filename(metadata['title']) final_directory = os.path.join( output_directory, artist, album) Path(final_directory).mkdir(parents=True, exist_ok=True) # TODO: Research converting to mp3 instead of just naming it such. # TODO: Research better file formats over mp3? os.rename( file, os.path.join(final_directory, '{}_{}_{}.mp3'.format(artist, album, title))) def write_metadata_to_song_file(filename, metadata): file = music_tag.load_file(filename) file['name'] = metadata['title'] file['artist'] = metadata['artist'] file['album'] = metadata['album'] file['year'] = metadata['year'] file.save() def get_song_metadata_from_json(json, counter): metadata = { 'title': 'unknownsong', 'artist': 'unknownartist', 'album': 'unknownalbum', 'year': 1999, 'jamos_filename': 'jamos_unknwon_file_number_{}.mp3'.format(counter), } try: if ('artist' in json.keys()) and (json['artist'] is not None): metadata['artist'] = json['artist'] if len(metadata['artist'].split(',')) > 1: # If there are multiple artists, pick the first one # NOTE: This will break if the artist has a comma in their name metadata['artist'] = metadata['artist'].split(',')[0] if ('album' in json.keys()) and (json['album'] is not None): metadata['album'] = json['album'] if ('title' in json.keys()) and (json['title'] is not None): metadata['title'] = json['title'] if ('release_date' in json.keys() and json['release_date'] is not None): metadata['year'] = format_youtube_date(json['release_date']) artist_for_filename = metadata['artist'].replace(' ', '_').lower() title_for_filename = metadata['title'].replace(' ', '_').lower() metadata['jamos_filename'] = '{}_{}.mp3'.format(artist_for_filename, title_for_filename) except Exception as ex: print(ex) return metadata def create_downloader(music_directory, cookies): audio_options = { 'format': 'mp3/bestaudio/best', 'cookiefile': cookies, 'outtmpl': music_directory + '%(title)s.%(ext)s', 'postprocessors': [ { 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }, {'key': 'FFmpegMetadata'}, ], 'writeinfojson': True, 'quiet': not DEBUG } return youtube_dl.YoutubeDL(audio_options) def save_urls_from_playlist_to_file(filename, urls): try: f = open(filename, "a") for url in urls: f.writelines(url + '\n') f.close() except Exception as e: print(e) raise e if __name__ == "__main__": try: args = get_command_line_options() except Exception as ex: sys.exit() music_directory = args.output or os.path.join(os.path.expanduser("~"), "Music") cookies = args.cookies or os.path.join(os.path.expanduser("~"), "cookies.txt") # From some testing, if your playlist is public, you don't have to use a # cookie file. Youtube-dl doesn't break or throw if the file doesn't exist. ytdl = create_downloader(music_directory, cookies) urls = [] if args.url: playlist_url = args.url print("Downloading urls...", end="") urls = get_video_urls_in_playlist(playlist_url, ytdl) print("Done.") elif args.retryFile: with open(args.retryFile) as retry_file: urls = retry_file.read().splitlines() failed_urls = [] for url in urls: try: print("Downloading: {}...".format(url), end='') ytdl.extract_info(url, download=True) print("Done.") except Exception as ex: print(ex) print("Could not download: {}".format((url))) failed_urls.append(url) try: if len(failed_urls) > 1: print("Saving failed urls to txt file.") save_urls_from_playlist_to_file( os.path.join(music_directory, "jamos_failed_urls.txt"), failed_urls) elif args.retryFile: # Just because we don't have any failed urls in this run, doesn't # mean that we can get rid of the retry file. We'll only remove it # if it's been explicitly tried and we have no failed urls. # We've successfully downloaded all of the previously failed urls. # Delete the file os.remove(args.retryFile) except Exception as ex: print(ex) print("Saving failed urls to file failed! Printing failed urls:") for url in failed_urls: print(url) files = get_all_files(music_directory) counter = 1 for f in files: try: print("Adding metadata to {} ...".format(f), end="") with open(f.replace('.mp3', '.info.json')) as json_file: json_data = json.load(json_file) metadata = get_song_metadata_from_json(json_data, counter) write_metadata_to_song_file(f, metadata) print("Done".format(f)) print("Moving file...", end="") move_file(f, metadata, music_directory) print("Done", end="") counter += 1 except Exception as e: # just gonna print this and move on to the next file. print(e) print("Cleaning up JSON files...", end='') cleanup_metadata_files(music_directory) print("Done.")