initcommit, I'll add .gitignore later

This commit is contained in:
Zergling_man 2021-11-17 22:44:26 +11:00
commit 0a11464ce6
6 changed files with 178 additions and 0 deletions

1
README.md Normal file
View File

@ -0,0 +1 @@
A lyrics fetching tool, written in python. Similarly designed to a package manager. Manages indexing itself, sources don't need to do that.

4
config.json.example Normal file
View File

@ -0,0 +1,4 @@
{
"lyrics":"/path/to/lyrics/files",
"indexcache":"~/.cache/known_songs"
}

62
getlyrics.py Normal file
View File

@ -0,0 +1,62 @@
import requests as r
import sys
import os
import json as j
os.chdir('/home/wisknort/lyricise') # Do this first so that the following work.
with open('config.json') as b: conf=j.load(b)
import importlib as il
sources=[il.import_module(f'sources.{n[:-3]}') for n in os.listdir('sources') if n.endswith('.py')]
sources=list(filter(lambda x:x.enabled,sources))
class FlashyNotMountedError(Exception):
pass
def init(args):
#Init
split=args.index('-')
band=' '.join([n.capitalize() for n in args[:split]])
song=' '.join([n.capitalize() for n in args[split+1:]])
return band,song
def local(band,song):
#Local
#All lyrics that are fetched from remote will automatically get saved in here for later reference
#Also, any time the existence of a song on a remote is proven (eg. the remote offers an album list for a band, which in turn offers a track list, and this needs to be parsed to get the correct URL), that will be indexed locally for future reference, in ./.indices
#Deleting that folder will reset that, and should always be safe.
#Indices will be one file per source, so deleting individual files will be part of uninstalling a source.
try:
os.chdir(conf['lyrics'])
prep=lambda x:x.lower().replace(' ','_')
try:
with open(f'{prep(song)}_{prep(band)}.lyric') as b:
lyrics=b.read()
except FileNotFoundError: lyrics=''
return lyrics
except FileNotFoundError:
raise FlashyNotMountedError
def remote(band,song):
#Runs through sources and tries to locate the song and fetch lyrics.
#Will search all local indices first, and offer which repository to fetch from if multiple found
#But will *not* poll all remote repositories; if not found in local indices, will begin polling sources in whatever order the underlying (他框)walk decides, and will stop as soon as an exact match is found.
#If a partial match is found, will offer to user whether it's close enough.
if not sources:
print('No sources configured! Failed to fetch lyrics from remote!')
#TODO
lyrics=''
for source in sources:
try: lyrics=source.lyrics(song,band)
except Exception: raise # This should actually catch errors later
if lyrics: break
return lyrics
if __name__=='__main__':
band,song=init(sys.argv[1:])
try: lyrics=local(band,song)
except FlashyNotMountedError:
if input("Flashy not mounted, proceed? (y/n)")!='y': exit()
lyrics=''
if not lyrics:
print("Lyrics not found locally, searching for remote...")
lyrics=remote(band,song) or ''
if lyrics: print(lyrics)

14
sources/README.md Normal file
View File

@ -0,0 +1,14 @@
Not actually markdown, haha tricked ya!
Documentation for files in this folder.
1) Every file must be a python script
2) It must be named with the domain it serves as the filename (starting from TLD and narrowing down; dots and slashes should be converted to underscores - the fetcher won't actually try to restore it and load the URL).
3) It must expose an "enabled" global boolean that, if False, will cause the fetcher to ignore its existence entirely. This is useful for sources that are temporarily not working, or for sources that are pending proper updates to the source's changes, etc.
4) It must expose a lyrics(song,band='',album='') function that returns song lyrics as plaintext (str), or as arranged lyrics (dict\[sections (list),arrangement (list)\]). It's recommended that if neither band nor album are given, the function should automatically return a failure, or explicitly recommend a search, rather than attempting to work with it.
5) It may expose other functions, such as:
- search(song,band='',album=''), which, as above, is recommended to automatically fail if only song is given
- index(band,album=''), which should return a list of the band's tracks (on that album), for populating the local index
- massfetch(band,album=''), which should fetch lyrics to all tracks of the band (on that album) and return them in a list
- This will, at some point, become a definitive list of optional functions that the fetcher supports; it will always accept recommendations for change.
6) Input formats: All names will be passed in exactly as the user presents the information, it is the source's job to wrangle it correctly into URLs. For example, "wither." is an album name that may be given to the source. It should generally assume that words will be separated with spaces, but there's no guarantee. Also it may occasionally get bands and songs passed in as each other. It isn't expected to figure that out.

72
sources/com_bandcamp.py Normal file
View File

@ -0,0 +1,72 @@
enabled=True
import requests as r
def processneedle(gunk,start,needle):
try: start+=gunk[start:].index(needle[0])+len(needle[0])
except ValueError: print('failed needle:',needle); return None,None
end=gunk[start+1:].index(needle[1])+1
return gunk[start:start+end],start+end+len(needle[1])
def stringiter(gunk:str,needle:tuple=('',''),needles:list=[]):
start=0
if needles:
# Non-interactive mode. You have all the info upfront.
needle=0
while True:
out={}
for needle in needles:
res,start=processneedle(gunk,start,needle)
if res==start==None: return
try: out[needle[2]]=res
except IndexError:
try: out[''].append(res)
except KeyError: out['']=[res]
yield out
# Interactive mode. I guess don't use this as much.
if needle==('',''): needle=yield None
while True:
res,start=processneedle(gunk,start,needle)
if res==start==None: return
yield res
def bandget(band):
peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,
# I'll put error handling in later
# I'll also demand a real API later
albums=[]
needles=[('href="/album/','"','urlid'),
('img src="','"','coverurl'),
('class="title">\n ','\n','title')]
for needle in stringiter(peg,needles=needles):
albums.append(needle)
return albums
def albumget(band,album,mode=0):
# Returns track list or all tracks' lyrics, based on mode
peg=r.get(f'https://{band}.bandcamp.com/album/{album}').content.decode('utf-8')
tracks=[]
needles=[('rel="tracknum=','"','num'),
('a href="/track/','">','urlid'),
('span class="track-title">','</span>','title'),
('<span class="time secondaryText">\n \n ','\n','duration')]
# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.
for needle in stringiter(peg,needles=needles):
tracks.append(needle)
return tracks
def index(band,album=''):
if album:
return albumget(band,album,0)
albums=bandget(band)
return {x:albumget(band,x,0) for x in map(lambda x:x['urlid'],albums)}
def lyrics(song,band='',album=''):
if not (band):
return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."
band=band.replace(' ',''); song=song.replace(' ','-')
peg=r.get(f'https://{band}.bandcamp.com/track/{song}').content.decode('utf-8')
needle=('<div class="tralbumData lyricsText">','</div>','lyrics')
lyrics=next(stringiter(peg,needle=needle))
lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')
return lyrics

View File

@ -0,0 +1,25 @@
enabled=False
"""
Lyrics fandom has been deleted with no notice and no reason given, even when asked directly.
It will never be reinstated, this file will never work again. It can be safely deleted.
It's still included as a reminder of one of the nice things we just can't have thanks to, probably, copyright.
"""
def lyrics(band,song):
#Remote
p={'format':'json', 'action':'parse', 'prop':'wikitext', 'page':f'{band}:{song}'}
lyrics=r.get('https://lyrics.fandom.com/api.php',params=p)
try:
lyrics=lyrics.json()['parse']['wikitext']['*']
except KeyError: return "Couldn't find it boi"
#Follow redirects
while '#redirect' in lyrics.lower():
p['page']=lyrics[lyrics.index('[[')+2:lyrics.index(']]')]
lyrics=r.get('https://lyrics.fandom.com/api.php',params=p).json()['parse']['wikitext']['*']
out=lyrics[lyrics.index('<lyrics>')+8:lyrics.index('</lyrics>')]
return out
def url(band,song):
p={'format':'json', 'action':'parse', 'prop':'wikitext', 'page':f'{band}:{song}'}
p="&".join([f'{k}={v}' for k,v in p.items()])
return f'https://lyrics.fandom.com/wiki/{band}:{song}',f'https://lyrics.fandom.com/api.php?{p}'