import os
import sys
import operator
from collections import Counter
from eyed3 import id3, mp3
from eyed3.core import AUDIO_MP3
from eyed3.mimetype import guessMimetype
from eyed3.utils.console import Fore, Style, printMsg
from eyed3.plugins import LoaderPlugin
from eyed3.id3 import frames
ID3_VERSIONS = [id3.ID3_V1_0, id3.ID3_V1_1,
id3.ID3_V2_2, id3.ID3_V2_3, id3.ID3_V2_4]
_OP_STRINGS = {operator.le: "<=",
operator.lt: "< ",
operator.ge: ">=",
operator.gt: "> ",
operator.eq: "= ",
operator.ne: "!=",
}
[docs]class Rule:
[docs] def test(self, path, audio_file):
raise NotImplementedError()
PREFERRED_ID3_VERSIONS = [id3.ID3_V2_3,
id3.ID3_V2_4,
]
[docs]class Id3TagRules(Rule):
[docs] def test(self, path, audio_file):
scores = []
if audio_file is None:
return None
if not audio_file.tag:
return [(-75, "Missing ID3 tag")]
tag = audio_file.tag
if tag.version not in PREFERRED_ID3_VERSIONS:
scores.append((-30, "ID3 version not in %s" %
PREFERRED_ID3_VERSIONS))
if not tag.title:
scores.append((-30, "Tag missing title"))
if not tag.artist:
scores.append((-28, "Tag missing artist"))
if not tag.album:
scores.append((-26, "Tag missing album"))
if not tag.track_num[0]:
scores.append((-24, "Tag missing track number"))
if not tag.track_num[1]:
scores.append((-22, "Tag missing total # of tracks"))
if not tag.getBestDate():
scores.append((-30, "Tag missing any useful dates"))
else:
if not tag.original_release_date:
# Original release date is so rarely used but is almost always
# what I mean or wanna know.
scores.append((-10, "No original release date"))
elif not tag.release_date:
scores.append((-5, "No release date"))
# TLEN, best gotten from audio_file.info.time_secs but having it in
# the tag is good, I guess.
if b"TLEN" not in tag.frame_set:
scores.append((-5, "No TLEN frame"))
return scores
[docs]class BitrateRule(Rule):
BITRATE_DEDUCTIONS = [(128, -20), (192, -10)]
[docs] def test(self, path, audio_file):
scores = []
if not audio_file:
return None
if not audio_file.info:
# Detected as an audio file but not real audio data found.
return [(-90, "No audio data found")]
is_vbr, bitrate = audio_file.info.bit_rate
for threshold, score in self.BITRATE_DEDUCTIONS:
if bitrate < threshold:
scores.append((score, "Bit rate < %d" % threshold))
break
return scores
VALID_MIME_TYPES = mp3.MIME_TYPES + ["image/png",
"image/gif",
"image/jpeg",
]
[docs]class FileRule(Rule):
[docs] def test(self, path, audio_file):
mt = guessMimetype(path)
for name in os.path.split(path):
if name.startswith('.'):
return [(-100, "Hidden file type")]
if mt not in VALID_MIME_TYPES:
return [(-100, "Unsupported file type: %s" % mt)]
return None
VALID_ARTWORK_NAMES = ("cover", "cover-front", "cover-back")
[docs]class ArtworkRule(Rule):
[docs] def test(self, path, audio_file):
mt = guessMimetype(path)
if mt and mt.startswith("image/"):
name, ext = os.path.splitext(os.path.basename(path))
if name not in VALID_ARTWORK_NAMES:
return [(-10, "Artwork file not in %s" %
str(VALID_ARTWORK_NAMES))]
return None
BAD_FRAMES = [frames.PRIVATE_FID, frames.OBJECT_FID]
[docs]class Id3FrameRules(Rule):
[docs] def test(self, path, audio_file):
scores = []
if not audio_file or not audio_file.tag:
return
tag = audio_file.tag
for fid in tag.frame_set:
if fid[0] == 'T' and fid != "TXXX" and len(tag.frame_set[fid]) > 1:
scores.append((-10, "Multiple %s frames" % fid.decode('ascii')))
elif fid in BAD_FRAMES:
scores.append((-13, "%s frames are bad, mmmkay?" %
fid.decode('ascii')))
return scores
[docs]class Stat(Counter):
TOTAL = "total"
def __init__(self, *args, **kwargs):
super(Stat, self).__init__(*args, **kwargs)
self[self.TOTAL] = 0
self._key_names = {}
[docs] def compute(self, file, audio_file):
self[self.TOTAL] += 1
self._compute(file, audio_file)
def _compute(self, file, audio_file):
pass
[docs] def report(self):
self._report()
def _sortedKeys(self, most_common=False):
def keyDisplayName(k):
return self._key_names[k] if k in self._key_names else k
key_map = {}
for k in list(self.keys()):
key_map[keyDisplayName(k)] = k
if not most_common:
sorted_names = [k for k in key_map.keys() if k]
sorted_names.remove(self.TOTAL)
sorted_names.sort()
sorted_names.append(self.TOTAL)
else:
most_common = self.most_common()
sorted_names = []
remainder_names = []
for k, v in most_common:
if k != self.TOTAL and v > 0:
sorted_names.append(keyDisplayName(k))
elif k != self.TOTAL:
remainder_names.append(keyDisplayName(k))
remainder_names.sort()
sorted_names = sorted_names + remainder_names
sorted_names.append(self.TOTAL)
return [key_map[name] for name in sorted_names]
def _report(self, most_common=False):
keys = self._sortedKeys(most_common=most_common)
key_col_width = 0
val_col_width = 0
for key in keys:
key = self._key_names[key] if key in self._key_names else key
key_col_width = max(key_col_width, len(str(key)))
val_col_width = max(val_col_width, len(str(self[key])))
key_col_width += 1
val_col_width += 1
for k in keys:
key_name = self._key_names[k] if k in self._key_names else k
value = self[k]
percent = self.percent(k) if value and k != "total" else ""
print("{padding}{key}:{value}{percent}".format(
padding=' ' * 4,
key=str(key_name).ljust(key_col_width),
value=str(value).rjust(val_col_width),
percent=" ( %s%.2f%%%s )" % (Fore.GREEN, percent, Fore.RESET)
if percent else "",
))
[docs] def percent(self, key):
return (float(self[key]) / float(self["total"])) * 100
[docs]class AudioStat(Stat):
[docs] def compute(self, audio_file):
assert audio_file
self["total"] += 1
self._compute(audio_file)
def _compute(self, audio_file):
pass
[docs]class FileCounterStat(Stat):
SUPPORTED_AUDIO = "audio"
UNSUPPORTED_AUDIO = "audio (unsupported)"
HIDDEN_FILES = "hidden"
OTHER_FILES = "other"
def __init__(self):
super(FileCounterStat, self).__init__()
for k in ("audio", "hidden", "audio (unsupported)"):
self[k] = 0
def _compute(self, file, audio_file):
mt = guessMimetype(file)
if audio_file:
self[self.SUPPORTED_AUDIO] += 1
elif mt and mt.startswith("audio/"):
self[self.UNSUPPORTED_AUDIO] += 1
elif os.path.basename(file).startswith('.'):
self[self.HIDDEN_FILES] += 1
else:
self[self.OTHER_FILES] += 1
def _report(self):
print(Style.BRIGHT + Fore.YELLOW + "Files:" + Style.RESET_ALL)
super(FileCounterStat, self)._report()
[docs]class MimeTypeStat(Stat):
def _compute(self, file, audio_file):
mt = guessMimetype(file)
self[mt] += 1
def _report(self):
print(Style.BRIGHT + Fore.YELLOW + "Mime-Types:" + Style.RESET_ALL)
super(MimeTypeStat, self)._report(most_common=True)
[docs]class Id3VersionCounter(AudioStat):
def __init__(self):
super(Id3VersionCounter, self).__init__()
for v in ID3_VERSIONS:
self[v] = 0
self._key_names[v] = id3.versionToString(v)
def _compute(self, audio_file):
if audio_file.tag:
self[audio_file.tag.version] += 1
else:
self[None] += 1
def _report(self):
print(Style.BRIGHT + Fore.YELLOW + "ID3 versions:" + Style.RESET_ALL)
super(Id3VersionCounter, self)._report()
[docs]class Id3FrameCounter(AudioStat):
def _compute(self, audio_file):
if audio_file.tag:
for frame_id in audio_file.tag.frame_set:
self[frame_id] += len(audio_file.tag.frame_set[frame_id])
def _report(self):
print(Style.BRIGHT + Fore.YELLOW + "ID3 frames:" + Style.RESET_ALL)
super(Id3FrameCounter, self)._report(most_common=True)
[docs]class BitrateCounter(AudioStat):
def __init__(self):
super(BitrateCounter, self).__init__()
self["cbr"] = 0
self["vbr"] = 0
self.bitrate_keys = [(operator.le, 96),
(operator.le, 112),
(operator.le, 128),
(operator.le, 160),
(operator.le, 192),
(operator.le, 256),
(operator.le, 320),
(operator.gt, 320),
]
for k in self.bitrate_keys:
self[k] = 0
op, bitrate = k
self._key_names[k] = "%s %d" % (_OP_STRINGS[op], bitrate)
def _compute(self, audio_file):
if audio_file.type != AUDIO_MP3 or audio_file.info is None:
self["total"] -= 1
return
vbr, br = audio_file.info.bit_rate
if vbr:
self["vbr"] += 1
else:
self["cbr"] += 1
for key in self.bitrate_keys:
key_op, key_br = key
if key_op(br, key_br):
self[key] += 1
break
def _report(self):
print(Style.BRIGHT + Fore.YELLOW + "MP3 bitrates:" + Style.RESET_ALL)
super(BitrateCounter, self)._report(most_common=True)
def _sortedKeys(self, most_common=False):
keys = super(BitrateCounter, self)._sortedKeys(most_common=most_common)
keys.remove("cbr")
keys.remove("vbr")
keys.insert(0, "cbr")
keys.insert(1, "vbr")
return keys
[docs]class RuleViolationStat(Stat):
def _report(self):
print(Style.BRIGHT + Fore.YELLOW + "Rule Violations:" + Style.RESET_ALL)
super(RuleViolationStat, self)._report(most_common=True)
[docs]class Id3ImageTypeCounter(AudioStat):
def __init__(self):
super(Id3ImageTypeCounter, self).__init__()
self._key_names = {}
for attr in dir(frames.ImageFrame):
val = getattr(frames.ImageFrame, attr)
if isinstance(val, int) and not attr.endswith("_TYPE"):
self._key_names[val] = attr
for v in self._key_names:
self[v] = 0
def _compute(self, audio_file):
if audio_file.tag:
for img in audio_file.tag.images:
self[img.picture_type] += 1
def _report(self):
print(Style.BRIGHT + Fore.YELLOW + "APIC image types:" + Style.RESET_ALL)
super(Id3ImageTypeCounter, self)._report()
[docs]class StatisticsPlugin(LoaderPlugin):
NAMES = ['stats']
SUMMARY = "Computes statistics for all audio files scanned."
def __init__(self, arg_parser):
super(StatisticsPlugin, self).__init__(arg_parser)
self.arg_group.add_argument(
"--verbose", action="store_true", default=False,
help="Show details for each file with rule violations.")
self._stats = []
self._rules_stat = RuleViolationStat()
self._stats.append(FileCounterStat())
self._stats.append(MimeTypeStat())
self._stats.append(Id3VersionCounter())
self._stats.append(Id3FrameCounter())
self._stats.append(Id3ImageTypeCounter())
self._stats.append(BitrateCounter())
self._score_sum = 0
self._score_count = 0
self._rules_log = {}
self._rules = [Id3TagRules(),
FileRule(),
ArtworkRule(),
BitrateRule(),
Id3FrameRules(),
]
[docs] def handleFile(self, path):
super(StatisticsPlugin, self).handleFile(path)
if not self.args.quiet:
sys.stdout.write('.')
sys.stdout.flush()
for stat in self._stats:
if isinstance(stat, AudioStat):
if self.audio_file:
stat.compute(self.audio_file)
else:
stat.compute(path, self.audio_file)
self._score_count += 1
total_score = 100
for rule in self._rules:
scores = rule.test(path, self.audio_file) or []
if scores:
if path not in self._rules_log:
self._rules_log[path] = []
for score, text in scores:
self._rules_stat[text] += 1
self._rules_log[path].append((score, text))
# += because negative values are returned
total_score += score
if total_score != 100:
self._rules_stat[Stat.TOTAL] += 1
self._score_sum += total_score
[docs] def handleDone(self):
if self._num_loaded == 0:
super(StatisticsPlugin, self).handleDone()
return
print()
for stat in self._stats + [self._rules_stat]:
stat.report()
print()
# Detailed rule violations
if self.args.verbose:
for path in self._rules_log:
printMsg(path) # does the right thing for unicode
for score, text in self._rules_log[path]:
print(f"\t{Fore.RED}{str(score).center(3)}{Fore.RESET} ({text})")
def prettyScore():
s = float(self._score_sum) / float(self._score_count)
if s > 80:
c = Fore.GREEN
elif s > 70:
c = Fore.YELLOW
else:
c = Fore.RED
return s, c
score, color = prettyScore()
print(f"{Style.BRIGHT}Score{Style.RESET_BRIGHT} = {color}{score}%%{Fore.RESET}")
if not self.args.verbose:
print("Run with --verbose to see files and their rule violations")
print()