From 91e14e1cca64448b8e56c858e45e113f2ca9c6b3 Mon Sep 17 00:00:00 2001 From: Magnus Walbeck <mw@mwalbeck.org> Date: Wed, 28 Jun 2017 14:54:12 +0200 Subject: [PATCH 1/3] Initial rename functionality --- podfox/__init__.py | 92 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 6 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 92d9f8f..1cdb1fe 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -28,6 +28,7 @@ import os import os.path import requests import sys +import re # RSS datetimes follow RFC 2822, same as email headers. # this is the chain of stackoverflow posts that led me to believe this is true. @@ -37,7 +38,7 @@ import sys # how-to-parse-a-rfc-2822-date-time-into-a-python-datetime from email.utils import parsedate -from time import mktime +from time import mktime, gmtime, strftime CONFIGURATION = {} @@ -188,28 +189,107 @@ def episodes_from_feed(d): return episodes +def rename_episode(folder, published, title, url): + if CONFIGURATION['date_format']: + date_format = CONFIGURATION['date_format'] + else: + date_format = "%Y-%m-%d" + published_date = strftime(date_format, gmtime(published)) + safe_title = escape_string(title) + extenstion = get_extenstion(url) + filename = published_date + " - " + safe_title + extenstion + + if not episode_exists(folder, filename): + return filename + + # If filename exists change title to original filename + original_title = get_original_filename(url) + filename = published_date + " - " + original_title + + if not episode_exists(folder, filename): + return filename + + # If filename exists change date to current and title to episode title + current_date = strftime("%Y-%m-%d", gmtime()) + filename = current_date + " - " + safe_title + extenstion + + if not episode_exists(folder, filename): + return filename + + # If filename exists change date to current and title to original filename + filename = current_date + " - " + original_title + + if not episode_exists(folder, filename): + return filename + + return add_epoch(original_title) + + +def escape_string(title): + pattern = r'[\|#:%&{}\\/<>*?$!\'"@]' + return re.sub(pattern, "_", title) + + +def get_extenstion(url): + url = url.split("?")[0] + pattern = r'[.][\w]+$' + return re.match(pattern, url) + + +def get_original_filename(url): + url = url.split("?")[0] + pattern = r'[^\/]+$' + return re.match(pattern, url) + + +def add_epoch(filename): + return gmtime() + " - " + filename + + +def episode_exists(shortname, filename): + base = CONFIGURATION['podcast-directory'] + if os.path.exists(os.path.join(base, shortname, filename)): + return True + + return False + + +def generic_episode_name(folder, url): + name = get_original_title(url) + + if not episode_exists(folder, name) + return name + + return add_epoch(name) + + def download_multiple(feed, maxnum): for episode in feed['episodes']: if maxnum == 0: break if not episode['downloaded']: - download_single(feed['shortname'], episode['url']) + if CONFIGURATION['rename_episodes']: + filename = rename_episode(feed['shortname'], episode['published'], + episode["title"], episode["url"]) + else: + filename = generic_episode_name(feed['shortname'], episode['url']) + download_single(feed['shortname'], episode['url'], filename) episode['downloaded'] = True maxnum -= 1 overwrite_config(feed) -def download_single(folder, url): +def download_single(folder, url, filename): print(url) base = CONFIGURATION['podcast-directory'] - filename = url.split('/')[-1] - filename = filename.split('?')[0] + if filename is None: + filename = get_original_filename(url) print_green("{:s} downloading".format(filename)) r = requests.get(url.strip(), stream=True) with open(os.path.join(base, folder, filename), 'wb') as f: for chunk in r.iter_content(chunk_size=1024**2): f.write(chunk) - print("done.") + print("done.") def available_feeds(): From 79aad1c842ec7695d9a14a9a0235ff79a150615d Mon Sep 17 00:00:00 2001 From: Magnus Walbeck <mw@mwalbeck.org> Date: Thu, 29 Jun 2017 14:00:29 +0200 Subject: [PATCH 2/3] Add option to exclude date from renamed file --- podfox/__init__.py | 57 +++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 1cdb1fe..bcb3768 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -194,35 +194,54 @@ def rename_episode(folder, published, title, url): date_format = CONFIGURATION['date_format'] else: date_format = "%Y-%m-%d" - published_date = strftime(date_format, gmtime(published)) - safe_title = escape_string(title) - extenstion = get_extenstion(url) - filename = published_date + " - " + safe_title + extenstion - if not episode_exists(folder, filename): + # Use published date and escaped title as filename + safe_title = escape_string(title) + get_extenstion(url) + + if date_format: + published_date = strftime(date_format, gmtime(published)) + else: + published_date = None + + filename = construct_filename(safe_title, published_date) + + if not file_exists(folder, filename): return filename # If filename exists change title to original filename original_title = get_original_filename(url) - filename = published_date + " - " + original_title - if not episode_exists(folder, filename): + filname = construct_filename(original_title, published_date) + + if not file_exists(folder, filename): return filename - # If filename exists change date to current and title to episode title - current_date = strftime("%Y-%m-%d", gmtime()) - filename = current_date + " - " + safe_title + extenstion + # If filename exists change date to current and title to escaped title + if date_format: + current_date = strftime(date_format, gmtime()) + else: + current_date = None + + filname = construct_filename(safe_title, current_date) - if not episode_exists(folder, filename): + if not file_exists(folder, filename): return filename # If filename exists change date to current and title to original filename - filename = current_date + " - " + original_title + filname = construct_filename(original_title, current_date) - if not episode_exists(folder, filename): + if not file_exists(folder, filename): return filename - return add_epoch(original_title) + # If filename exists change date to current epoch and original filename + return construct_filename(original_title, gmtime()) + + +def construct_filename(title, date=None): + if date is None: + return title + + return date + " - " + title def escape_string(title): @@ -242,11 +261,7 @@ def get_original_filename(url): return re.match(pattern, url) -def add_epoch(filename): - return gmtime() + " - " + filename - - -def episode_exists(shortname, filename): +def file_exists(shortname, filename): base = CONFIGURATION['podcast-directory'] if os.path.exists(os.path.join(base, shortname, filename)): return True @@ -257,10 +272,10 @@ def episode_exists(shortname, filename): def generic_episode_name(folder, url): name = get_original_title(url) - if not episode_exists(folder, name) + if not file_exists(folder, name) return name - return add_epoch(name) + return construct_filename(name, gmtime()) def download_multiple(feed, maxnum): From 45dd080910fc2c8ab99a7b592270a4348c3e67a4 Mon Sep 17 00:00:00 2001 From: Magnus Walbeck <mw@mwalbeck.org> Date: Thu, 29 Jun 2017 16:13:22 +0200 Subject: [PATCH 3/3] Fixes and improvements to rename code --- podfox/__init__.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index bcb3768..cdae0cb 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -38,7 +38,7 @@ import re # how-to-parse-a-rfc-2822-date-time-into-a-python-datetime from email.utils import parsedate -from time import mktime, gmtime, strftime +from time import time, mktime, gmtime, strftime CONFIGURATION = {} @@ -190,7 +190,7 @@ def episodes_from_feed(d): def rename_episode(folder, published, title, url): - if CONFIGURATION['date_format']: + if 'date_format' in CONFIGURATION: date_format = CONFIGURATION['date_format'] else: date_format = "%Y-%m-%d" @@ -210,8 +210,7 @@ def rename_episode(folder, published, title, url): # If filename exists change title to original filename original_title = get_original_filename(url) - - filname = construct_filename(original_title, published_date) + filename = construct_filename(original_title, published_date) if not file_exists(folder, filename): return filename @@ -222,26 +221,26 @@ def rename_episode(folder, published, title, url): else: current_date = None - filname = construct_filename(safe_title, current_date) + filename = construct_filename(safe_title, current_date) if not file_exists(folder, filename): return filename # If filename exists change date to current and title to original filename - filname = construct_filename(original_title, current_date) + filename = construct_filename(original_title, current_date) if not file_exists(folder, filename): return filename # If filename exists change date to current epoch and original filename - return construct_filename(original_title, gmtime()) + return construct_filename(original_title, int(time())) def construct_filename(title, date=None): if date is None: return title - return date + " - " + title + return "{} - {}".format(date, title) def escape_string(title): @@ -252,13 +251,13 @@ def escape_string(title): def get_extenstion(url): url = url.split("?")[0] pattern = r'[.][\w]+$' - return re.match(pattern, url) + return re.search(pattern, url).group(0) def get_original_filename(url): url = url.split("?")[0] pattern = r'[^\/]+$' - return re.match(pattern, url) + return re.search(pattern, url).group(0) def file_exists(shortname, filename): @@ -270,12 +269,12 @@ def file_exists(shortname, filename): def generic_episode_name(folder, url): - name = get_original_title(url) + filename = get_original_filename(url) - if not file_exists(folder, name) - return name + if not file_exists(folder, filename): + return filename - return construct_filename(name, gmtime()) + return construct_filename(filename, int(time())) def download_multiple(feed, maxnum): @@ -283,7 +282,7 @@ def download_multiple(feed, maxnum): if maxnum == 0: break if not episode['downloaded']: - if CONFIGURATION['rename_episodes']: + if 'rename_episodes' in CONFIGURATION and CONFIGURATION['rename_episodes']: filename = rename_episode(feed['shortname'], episode['published'], episode["title"], episode["url"]) else: