Compare commits
2 Commits
748ba97d94
...
0ea7db4dc7
Author | SHA1 | Date | |
---|---|---|---|
0ea7db4dc7 | |||
4c2d1043fc |
|
@ -1 +1,3 @@
|
||||||
A lyrics fetching tool, written in python. Similarly designed to a package manager. Manages indexing itself, sources don't need to do that.
|
A lyrics fetching tool, written in python. Similarly designed to a package manager. Manages indexing itself, sources don't need to do that.
|
||||||
|
Offers a similarity checker for matching strings, if sources do not wish to implement their own.
|
||||||
|
Most paths in config.json, if relative, will be relative to the base "lyrics" path. If that path is relative, it's relative to the current working directory. Please do not make that path relative.
|
37
getlyrics.py
37
getlyrics.py
|
@ -1,16 +1,15 @@
|
||||||
import requests as r
|
from sys import argv
|
||||||
import sys
|
from os import listdir as ls, chdir as cd, path
|
||||||
import os
|
|
||||||
from os import path
|
|
||||||
import json as j
|
import json as j
|
||||||
|
import utils as u
|
||||||
|
|
||||||
getfile=lambda x: path.join(path.dirname(__file__),x)
|
tracing=True
|
||||||
with open(getfile('config.json')) as b: conf=j.load(b)
|
u.tracing=tracing
|
||||||
|
|
||||||
|
with open(u.get_file('config.json')) as b: conf=j.load(b)
|
||||||
import importlib as il
|
import importlib as il
|
||||||
sources=[il.import_module(f'sources.{n[:-3]}') for n in os.listdir(getfile('sources')) if n.endswith('.py')]
|
sources=[il.import_module(f'sources.{n[:-3]}') for n in ls(u.get_file('sources')) if n.endswith('.py')]
|
||||||
sources=list(filter(lambda x:x.enabled,sources))
|
sources=list(filter(lambda x:x.enabled,sources))
|
||||||
class FlashyNotMountedError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def init(args):
|
def init(args):
|
||||||
#Init
|
#Init
|
||||||
|
@ -22,11 +21,12 @@ def init(args):
|
||||||
def local(band,song):
|
def local(band,song):
|
||||||
#Local
|
#Local
|
||||||
#All lyrics that are fetched from remote will automatically get saved in here for later reference
|
#All lyrics that are fetched from remote will automatically get saved in here for later reference
|
||||||
#Also, any time the existence of a song on a remote is proven (eg. the remote offers an album list for a band, which in turn offers a track list, and this needs to be parsed to get the correct URL), that will be indexed locally for future reference, in ./.indices
|
#Also, any time the existence of a song on a remote is proven (eg. the remote offers an album list for a band, which in turn offers a track list, and this needs to be parsed to get the correct URL), that will be indexed locally for future reference, in the indexcache config path.
|
||||||
#Deleting that folder will reset that, and should always be safe.
|
#Deleting that folder will reset that, and should always be safe.
|
||||||
#Indices will be one file per source, so deleting individual files will be part of uninstalling a source.
|
#Indices will be one file per source, so deleting individual files will be part of uninstalling a source.
|
||||||
|
#path.isabs(smth) - use as a switch for whether to join the path
|
||||||
try:
|
try:
|
||||||
os.chdir(conf['lyrics'])
|
cd(conf['lyrics'])
|
||||||
prep=lambda x:x.lower().replace(' ','_')
|
prep=lambda x:x.lower().replace(' ','_')
|
||||||
try:
|
try:
|
||||||
with open(f'{prep(song)}_{prep(band)}.lyric') as b:
|
with open(f'{prep(song)}_{prep(band)}.lyric') as b:
|
||||||
|
@ -34,7 +34,7 @@ def local(band,song):
|
||||||
except FileNotFoundError: lyrics=''
|
except FileNotFoundError: lyrics=''
|
||||||
return lyrics
|
return lyrics
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
raise FlashyNotMountedError
|
raise u.LyricsPathUnavailable()
|
||||||
|
|
||||||
def remote(band,song):
|
def remote(band,song):
|
||||||
#Runs through sources and tries to locate the song and fetch lyrics.
|
#Runs through sources and tries to locate the song and fetch lyrics.
|
||||||
|
@ -52,12 +52,15 @@ def remote(band,song):
|
||||||
return lyrics
|
return lyrics
|
||||||
|
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
band,song=init(sys.argv[1:])
|
band,song=init(argv[1:])
|
||||||
try: lyrics=local(band,song)
|
try: lyrics=local(band,song)
|
||||||
except FlashyNotMountedError:
|
except u.LyricsPathUnavailable:
|
||||||
if input("Flashy not mounted, proceed? (y/n)")!='y': exit()
|
if input("Local lyrics folder unavailable, proceed with remote lookup? (y/n)")!='y': exit()
|
||||||
lyrics=''
|
lyrics=''
|
||||||
if not lyrics:
|
if not lyrics:
|
||||||
print("Lyrics not found locally, searching for remote...")
|
print("Lyrics not found locally, searching for remote...")
|
||||||
lyrics=remote(band,song) or ''
|
try: lyrics=remote(band,song) or ''
|
||||||
if lyrics: print(lyrics)
|
except u.NotFound as e:
|
||||||
|
missings={'song':'Couldn\'t find that song.','lyrics':'No lyrics for that song.'}
|
||||||
|
print(missings[str(e)])
|
||||||
|
if lyrics: print(lyrics)
|
|
@ -2,7 +2,7 @@ Not actually markdown, haha tricked ya!
|
||||||
|
|
||||||
Documentation for files in this folder.
|
Documentation for files in this folder.
|
||||||
|
|
||||||
1) Every file must be a python script
|
1) For a file to be treated as a source, it must be a python script (.py). Other files are ignored.
|
||||||
2) It must be named with the domain it serves as the filename (starting from TLD and narrowing down; dots and slashes should be converted to underscores - the fetcher won't actually try to restore it and load the URL).
|
2) It must be named with the domain it serves as the filename (starting from TLD and narrowing down; dots and slashes should be converted to underscores - the fetcher won't actually try to restore it and load the URL).
|
||||||
3) It must expose an "enabled" global boolean that, if False, will cause the fetcher to ignore its existence entirely. This is useful for sources that are temporarily not working, or for sources that are pending proper updates to the source's changes, etc.
|
3) It must expose an "enabled" global boolean that, if False, will cause the fetcher to ignore its existence entirely. This is useful for sources that are temporarily not working, or for sources that are pending proper updates to the source's changes, etc.
|
||||||
4) It must expose a lyrics(song,band='',album='') function that returns song lyrics as plaintext (str), or as arranged lyrics (dict\[sections (list),arrangement (list)\]). It's recommended that if neither band nor album are given, the function should automatically return a failure, or explicitly recommend a search, rather than attempting to work with it.
|
4) It must expose a lyrics(song,band='',album='') function that returns song lyrics as plaintext (str), or as arranged lyrics (dict\[sections (list),arrangement (list)\]). It's recommended that if neither band nor album are given, the function should automatically return a failure, or explicitly recommend a search, rather than attempting to work with it.
|
||||||
|
@ -12,3 +12,4 @@ Documentation for files in this folder.
|
||||||
- massfetch(band,album=''), which should fetch lyrics to all tracks of the band (on that album) and return them in a list
|
- massfetch(band,album=''), which should fetch lyrics to all tracks of the band (on that album) and return them in a list
|
||||||
- This will, at some point, become a definitive list of optional functions that the fetcher supports; it will always accept recommendations for change.
|
- This will, at some point, become a definitive list of optional functions that the fetcher supports; it will always accept recommendations for change.
|
||||||
6) Input formats: All names will be passed in exactly as the user presents the information, it is the source's job to wrangle it correctly into URLs. For example, "wither." is an album name that may be given to the source. It should generally assume that words will be separated with spaces, but there's no guarantee. Also it may occasionally get bands and songs passed in as each other. It isn't expected to figure that out.
|
6) Input formats: All names will be passed in exactly as the user presents the information, it is the source's job to wrangle it correctly into URLs. For example, "wither." is an album name that may be given to the source. It should generally assume that words will be separated with spaces, but there's no guarantee. Also it may occasionally get bands and songs passed in as each other. It isn't expected to figure that out.
|
||||||
|
7) Output: If a function returns more, or different, data than was expected (eg. lyrics are available on an album's track list), it is welcome to raise a utils.AdditionalData, containing a nested dictionary with the actual received data. The only valid keys are "name", "contents", "metadata" and "type". "type" must be one of "band", "album", "track". "metadata" will later allow other keys (such as "url", "duration", "number", "year", "artist" - for guest tracks, etc.). "contents" may be a dictionary which obeys the same rules. It is also welcome to raise a utils.NotFound with a message of "song", "lyrics", "band" or "album", to indicate a failure. The distinction between "song" and "lyrics" is that the song appears to exist, but no lyrics are present (not provided yet, or instrumental).
|
|
@ -1,34 +1,7 @@
|
||||||
enabled=True
|
enabled=True
|
||||||
|
|
||||||
import requests as r
|
import requests as r
|
||||||
|
import utils as u
|
||||||
def processneedle(gunk,start,needle):
|
|
||||||
try: start+=gunk[start:].index(needle[0])+len(needle[0])
|
|
||||||
except ValueError: print('failed needle:',needle); return None,None
|
|
||||||
end=gunk[start+1:].index(needle[1])+1
|
|
||||||
return gunk[start:start+end],start+end+len(needle[1])
|
|
||||||
|
|
||||||
def stringiter(gunk:str,needle:tuple=('',''),needles:list=[]):
|
|
||||||
start=0
|
|
||||||
if needles:
|
|
||||||
# Non-interactive mode. You have all the info upfront.
|
|
||||||
needle=0
|
|
||||||
while True:
|
|
||||||
out={}
|
|
||||||
for needle in needles:
|
|
||||||
res,start=processneedle(gunk,start,needle)
|
|
||||||
if res==start==None: return
|
|
||||||
try: out[needle[2]]=res
|
|
||||||
except IndexError:
|
|
||||||
try: out[''].append(res)
|
|
||||||
except KeyError: out['']=[res]
|
|
||||||
yield out
|
|
||||||
# Interactive mode. I guess don't use this as much.
|
|
||||||
if needle==('',''): needle=yield None
|
|
||||||
while True:
|
|
||||||
res,start=processneedle(gunk,start,needle)
|
|
||||||
if res==start==None: return
|
|
||||||
yield res
|
|
||||||
|
|
||||||
def bandget(band):
|
def bandget(band):
|
||||||
peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,
|
peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,
|
||||||
|
@ -38,7 +11,7 @@ def bandget(band):
|
||||||
needles=[('href="/album/','"','urlid'),
|
needles=[('href="/album/','"','urlid'),
|
||||||
('img src="','"','coverurl'),
|
('img src="','"','coverurl'),
|
||||||
('class="title">\n ','\n','title')]
|
('class="title">\n ','\n','title')]
|
||||||
for needle in stringiter(peg,needles=needles):
|
for needle in u.stringiter(peg,needles=needles):
|
||||||
albums.append(needle)
|
albums.append(needle)
|
||||||
return albums
|
return albums
|
||||||
|
|
||||||
|
@ -51,7 +24,7 @@ def albumget(band,album,mode=0):
|
||||||
('span class="track-title">','</span>','title'),
|
('span class="track-title">','</span>','title'),
|
||||||
('<span class="time secondaryText">\n \n ','\n','duration')]
|
('<span class="time secondaryText">\n \n ','\n','duration')]
|
||||||
# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.
|
# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.
|
||||||
for needle in stringiter(peg,needles=needles):
|
for needle in u.stringiter(peg,needles=needles):
|
||||||
tracks.append(needle)
|
tracks.append(needle)
|
||||||
return tracks
|
return tracks
|
||||||
|
|
||||||
|
@ -65,8 +38,11 @@ def lyrics(song,band='',album=''):
|
||||||
if not (band):
|
if not (band):
|
||||||
return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."
|
return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."
|
||||||
band=band.replace(' ',''); song=song.replace(' ','-')
|
band=band.replace(' ',''); song=song.replace(' ','-')
|
||||||
peg=r.get(f'https://{band}.bandcamp.com/track/{song}').content.decode('utf-8')
|
peg=r.get(f'https://{band}.bandcamp.com/track/{song}')
|
||||||
|
if peg.status_code!=200: raise u.NotFound('song')
|
||||||
|
peg=peg.content.decode('utf-8')
|
||||||
needle=('<div class="tralbumData lyricsText">','</div>','lyrics')
|
needle=('<div class="tralbumData lyricsText">','</div>','lyrics')
|
||||||
lyrics=next(stringiter(peg,needle=needle))
|
try: lyrics=next(u.stringiter(peg,needle=needle))
|
||||||
|
except StopIteration: raise u.NotFound('lyrics')
|
||||||
lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')
|
lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')
|
||||||
return lyrics
|
return lyrics
|
||||||
|
|
Loading…
Reference in New Issue
Block a user