#!/usr/bin/env python3 import argparse import datetime import glob import json import music_tag import os from pathlib import Path import requests import shutil import sys import youtube_dl from ytmusicapi import YTMusic # import musicpd DEBUG = True # Includes the format characters YOUTUBE_MUSIC_URL = "https://music.youtube.com/watch?v={}" """ How the albums will be formatted: { 'id': { 'name': '', 'tracks': [ { 'id': '', 'name': '' } ], } } """ ALBUMS = {} def cleanup_metadata_files(music_directory): files = glob.glob(os.path.join(music_directory, '*.json')) for file in files: os.remove(file) def format_youtube_date(date): default = "Unknown Year" try: fmt = "%Y%m%d" d = datetime.datetime.strptime(date, fmt) return d.year except Exception as ex: print(ex) return default def get_all_files(directory): things = glob.glob(os.path.join(directory, '*.mp3')) files = [] for thing in things: if os.path.isfile(thing): files.append(thing) return files def get_command_line_options(): parser = argparse.ArgumentParser( description="Download songs from YouTube Music") parser.add_argument("url", metavar="https://music.youtube.com/playlist?list=1234", nargs='?', default=None, type=str, help="Playlist or Song URL to download.") parser.add_argument("-c", "--cookies", metavar="cookiefile.txt", type=str, help="Cookie file to use.") parser.add_argument("--headers", metavar="header_path", type=str, help="Header file to use.") parser.add_argument("-o", "--output", metavar="output_directory", type=str, help="Output directory to use") parser.add_argument("-r", "--retryFile", metavar="jamos_failed_urls.txt", default=None, type=str, help="Output directory to use") args = parser.parse_args() if args.retryFile and args.url: print("Cannot have url and retry flag!\n") parser.print_help() raise Exception() if not args.retryFile and not args.url: print("Must pass either a url or a retry file!\n") parser.print_help() raise Exception() return args def get_song_metadata_from_json(json, counter): metadata = { 'title': 'unknownsong', 'artist': 'unknownartist', 'album': 'unknownalbum', 'year': 1999, 'jamos_filename': 'jamos_unknwon_file_number_{}.mp3'.format(counter), } try: if ('artist' in json.keys()) and (json['artist'] is not None): metadata['artist'] = json['artist'] if len(metadata['artist'].split(',')) > 1: # If there are multiple artists, pick the first one # NOTE: This will break if the artist has a comma in their name metadata['artist'] = metadata['artist'].split(',')[0] if ('album' in json.keys()) and (json['album'] is not None): metadata['album'] = json['album'] if ('title' in json.keys()) and (json['title'] is not None): metadata['title'] = json['title'] if ('release_date' in json.keys() and json['release_date'] is not None): metadata['year'] = format_youtube_date(json['release_date']) artist_for_filename = metadata['artist'].replace(' ', '_').lower() title_for_filename = metadata['title'].replace(' ', '_').lower() metadata['jamos_filename'] = '{}_{}.mp3'.format(artist_for_filename, title_for_filename) except Exception as ex: print(ex) return metadata def create_downloader(music_directory, cookies): audio_options = { 'format': 'mp3/bestaudio/best', 'cookiefile': cookies, 'outtmpl': os.path.join(music_directory, '%(id)s.%(ext)s'), 'postprocessors': [ { 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }, {'key': 'FFmpegMetadata'}, ], # 'writeinfojson': True, 'quiet': not DEBUG } return youtube_dl.YoutubeDL(audio_options) def get_video_urls_in_playlist(playlist_url, ytdl): videos = ytdl.extract_info(playlist_url, download=False) urls = [] for vid in videos['entries']: if 'webpage_url' in vid.keys() and vid['webpage_url'] is not None: urls.append(vid['webpage_url']) return urls def move_file(file, metadata, output_directory): artist = remove_special_characters_for_filename(metadata['artist']) album = remove_special_characters_for_filename(metadata['album']) title = remove_special_characters_for_filename(metadata['title']) final_directory = os.path.join( output_directory, artist, album) Path(final_directory).mkdir(parents=True, exist_ok=True) # TODO: Research converting to mp3 instead of just naming it such. # TODO: Research better file formats over mp3? os.rename( file, os.path.join(final_directory, '{}_{}_{}.mp3'.format(artist, album, title))) def save_urls_from_playlist_to_file(filename, urls): try: f = open(filename, "a") for url in urls: f.writelines(url + '\n') f.close() except Exception as e: print(e) raise e def remove_special_characters_for_filename(filename): special_chars = [ ['-', ' '], ['(', ''], [')', ''], ['/', ' '], ['/', ' '], [' ', '_'], ["'", ''], ["&", 'and'], [chr(8217), ''], ['$', 's'], ['.', ''] ] new_name = filename for char_set in special_chars: new_name = new_name.replace(char_set[0], char_set[1]) return new_name.lower() def write_metadata_to_song_file(filename, metadata): file = music_tag.load_file(filename) file['name'] = metadata['title'] file['artist'] = metadata['artist'] file['album'] = metadata['album'] file['year'] = metadata['year'] file.save() def create_youtube_music_api_object(header_path): return YTMusic(header_path) def save_album(app, album_id): raw_album = app.get_album(album_id) # TODO: Add thumbnail_url ALBUMS[album_id] = { 'name': raw_album['title'], 'tracks': [{'title': song['title'], 'id': song['videoId'], 'track_num': index + 1} for index, song in enumerate(raw_album['tracks'])], 'year': raw_album['year'], } # TODO: Add get_thumbnail thumbnail_url = raw_album['thumbnails'][-1]['url'] r = requests.get(thumbnail_url, stream=True) filename = './thumbnails/' + album_id + '.jpg' if not os.path.isfile(filename): # Check if the image was retrieved successfully if r.status_code == 200: # Set decode_content value to True, # otherwise the downloaded image file's size will be zero. r.raw.decode_content = True # Open a local file with wb ( write binary ) permission. with open(filename, 'wb') as f: shutil.copyfileobj(r.raw, f) print('Image sucessfully Downloaded: ', filename) else: print('Image Couldn\'t be retreived') ALBUMS[album_id]['thumbnail_filepath'] = filename def save_debug_data(filename, data, debug=True): if debug: with open(filename, 'w') as f: f.write(data) def parse_songs(app, all_raw_song_data): all_parsed_songs = [] for raw_song in all_raw_song_data: song_id = raw_song['videoId'] album_id = raw_song['album']['id'] if album_id not in ALBUMS.keys(): save_album(app, album_id) album = ALBUMS[album_id] track_num = None for track in ALBUMS[album_id]['tracks']: if track['id'] == song_id: track_num = track['track_num'] parsed_song = { 'id': raw_song['videoId'], 'title': raw_song['title'], 'artists': [artist['name'] for artist in raw_song['artists']], 'album': album['name'], 'track': track_num, 'url': YOUTUBE_MUSIC_URL.format(raw_song['videoId']), 'year': album['year'], 'thumbnail_filepath': album['thumbnail_filepath'] } all_parsed_songs.append(parsed_song) save_debug_data('parsed_album_data.json', json.dumps(ALBUMS), debug=DEBUG) save_debug_data('parsed_song_data.json', json.dumps(all_parsed_songs), debug=DEBUG) return all_parsed_songs def get_library_songs(app, song_limit, order='a_to_z'): all_raw_song_data = app.get_library_songs(limit=song_limit, order=order) save_debug_data('raw_song_data.json', json.dumps(all_raw_song_data), debug=DEBUG) all_parsed_songs = parse_songs(app, all_raw_song_data) return all_parsed_songs def stuff(): # TODO: Can you use the cookies from YTMusic here # cookies = os.path.join(os.path.expanduser("~"), "cookies.txt") cookies = './cookies.txt' ytdl = create_downloader(music_directory, cookies) failed_songs = [] for song in []: try: ytdl.extract_info(song['url'], download=True) except Exception as ex: print(ex) print("Could not download: {}".format((song))) failed_songs.append(song) failed_songs = [] for song in all_songs: try: print("Downloading: {} - {} - {} from {}...".format( song['title'], song['artist'], song['album'], song['url']), end='') ytdl.extract_info(song['url'], download=True) sys.exit() print("Done.") except Exception as ex: print(ex) print("Could not download: {}".format((song['url']))) failed_songs.append(song) files = get_all_files(music_directory) counter = 1 for f in files: try: print("Adding metadata to {} ...".format(f), end="") with open(f.replace('.mp3', '.info.json')) as json_file: json_data = json.load(json_file) metadata = get_song_metadata_from_json(json_data, counter) write_metadata_to_song_file(f, metadata) print("Done") print("Moving file...", end="") move_file(f, metadata, music_directory) print("Done") counter += 1 except Exception as e: # just gonna print this and move on to the next file. print(e) print("Cleaning up JSON files...", end='') cleanup_metadata_files(music_directory) print("Done.") # TODO: Implement def retry_downloading_songs(): # try: # if len(failed_urls) > 1: # print("Saving failed urls to txt file.") # save_urls_from_playlist_to_file( # os.path.join(music_directory, "jamos_failed_urls.txt"), # failed_urls) # elif args.retryFile: # # Just because we don't have any failed urls in this run, doesn't # # mean that we can get rid of the retry file. We'll only remove it # # if it's been explicitly tried and we have no failed urls. # # We've successfully downloaded all of the previously failed urls. # # Delete the file # os.remove(args.retryFile) # except Exception as ex: # print(ex) # print("Saving failed urls to file failed! Printing failed urls:") # for url in failed_urls: # print(url) print("Not Implemented Yet!") sys.exit() # TODO: Implement def handle_downloading_playlist(): print("Not Implemented Yet!") sys.exit() pass def sanitize_for_filename(filename): new_filename = filename.replace(' ', '_') new_filename = (''.join( [s for s in new_filename if s.isalnum() or s == '_'])).lower() return new_filename if __name__ == "__main__": try: args = get_command_line_options() except Exception as ex: print(ex) sys.exit() music_directory = args.output or os.path.join(os.path.expanduser("~"), "Music") cookies = args.cookies or os.path.join(os.path.expanduser("~"), "cookies.txt") app = create_youtube_music_api_object(args.headers) all_songs = get_library_songs(app, 5000) # From some testing, if your playlist is public, you don't have to use a # cookie file. Youtube-dl doesn't break or throw if the file doesn't exist. ytdl = create_downloader(music_directory, cookies) for song in all_songs: # download song ytdl.extract_info(song['url'], download=True) # get filename song was downloaded to filenames = glob.glob(os.path.join(music_directory, song['id'] + '.*')) filename = filenames[0] # tag basic data file = music_tag.load_file(filename) file['name'] = song['title'] file['artist'] = song['artists'][0] file['albumartist'] = song['artists'][0] file['album'] = song['album'] file['tracknumber'] = song['track'] file['year'] = song['year'] # include album cover try: with open(song['thumbnail_filepath'], 'rb') as img_in: file['artwork'] = img_in.read() except Exception as ex: print(ex) # save music tag data file.save() # move song to music_directory/artist/album/artist_album_title.ext # excluding non filesafe characters artist_for_filename = sanitize_for_filename( song['artists'][0].replace('$', 's')) # exception for $uicideboy$ album_for_filename = sanitize_for_filename(song['album']) title_for_filename = sanitize_for_filename(song['title']) song_output_dir = os.path.join(music_directory, artist_for_filename, album_for_filename) Path(song_output_dir).mkdir(parents=True, exist_ok=True) new_filename = '{}_{}_{}.mp3'.format(artist_for_filename, album_for_filename, title_for_filename) shutil.move(filename, os.path.join(song_output_dir, new_filename))