initcommit, I'll add .gitignore later

2021-11-17 22:44:26 +11:00
commit 0a11464ce6
6 changed files with 178 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1 @@
+A lyrics fetching tool, written in python. Similarly designed to a package manager. Manages indexing itself, sources don't need to do that.
--- a/config.json.example
+++ b/config.json.example
@@ -0,0 +1,4 @@
+{
+"lyrics":"/path/to/lyrics/files",
+"indexcache":"~/.cache/known_songs"
+}
--- a/getlyrics.py
+++ b/getlyrics.py
@@ -0,0 +1,62 @@
+import requests as r
+import sys
+import os
+import json as j
+
+os.chdir('/home/wisknort/lyricise') # Do this first so that the following work.
+with open('config.json') as b: conf=j.load(b)
+import importlib as il
+sources=[il.import_module(f'sources.{n[:-3]}') for n in os.listdir('sources') if n.endswith('.py')]
+sources=list(filter(lambda x:x.enabled,sources))
+class FlashyNotMountedError(Exception):
+ pass
+
+def init(args):
+	#Init
+	split=args.index('-')
+	band=' '.join([n.capitalize() for n in args[:split]])
+	song=' '.join([n.capitalize() for n in args[split+1:]])
+	return band,song
+
+def local(band,song):
+	#Local
+	#All lyrics that are fetched from remote will automatically get saved in here for later reference
+	#Also, any time the existence of a song on a remote is proven (eg. the remote offers an album list for a band, which in turn offers a track list, and this needs to be parsed to get the correct URL), that will be indexed locally for future reference, in ./.indices
+	#Deleting that folder will reset that, and should always be safe.
+	#Indices will be one file per source, so deleting individual files will be part of uninstalling a source.
+	try:
+		os.chdir(conf['lyrics'])
+		prep=lambda x:x.lower().replace(' ','_')
+		try:
+			with open(f'{prep(song)}_{prep(band)}.lyric') as b:
+				lyrics=b.read()
+		except FileNotFoundError: lyrics=''
+		return lyrics
+	except FileNotFoundError:
+		raise FlashyNotMountedError
+
+def remote(band,song):
+	#Runs through sources and tries to locate the song and fetch lyrics.
+	#Will search all local indices first, and offer which repository to fetch from if multiple found
+	#But will *not* poll all remote repositories; if not found in local indices, will begin polling sources in whatever order the underlying (他框)walk decides, and will stop as soon as an exact match is found.
+	#If a partial match is found, will offer to user whether it's close enough.
+	if not sources:
+		print('No sources configured! Failed to fetch lyrics from remote!')
+	#TODO
+	lyrics=''
+	for source in sources:
+		try: lyrics=source.lyrics(song,band)
+		except Exception: raise # This should actually catch errors later
+		if lyrics: break
+	return lyrics
+
+if __name__=='__main__':
+	band,song=init(sys.argv[1:])
+	try: lyrics=local(band,song)
+	except FlashyNotMountedError:
+		if input("Flashy not mounted, proceed? (y/n)")!='y': exit()
+		lyrics=''
+	if not lyrics:
+		print("Lyrics not found locally, searching for remote...")
+		lyrics=remote(band,song) or ''
+	if lyrics: print(lyrics)
--- a/sources/README.md
+++ b/sources/README.md
@@ -0,0 +1,14 @@
+Not actually markdown, haha tricked ya!
+
+Documentation for files in this folder.
+
+1) Every file must be a python script
+2) It must be named with the domain it serves as the filename (starting from TLD and narrowing down; dots and slashes should be converted to underscores - the fetcher won't actually try to restore it and load the URL).
+3) It must expose an "enabled" global boolean that, if False, will cause the fetcher to ignore its existence entirely. This is useful for sources that are temporarily not working, or for sources that are pending proper updates to the source's changes, etc.
+4) It must expose a lyrics(song,band='',album='') function that returns song lyrics as plaintext (str), or as arranged lyrics (dict\[sections (list),arrangement (list)\]). It's recommended that if neither band nor album are given, the function should automatically return a failure, or explicitly recommend a search, rather than attempting to work with it.
+5) It may expose other functions, such as:
+- search(song,band='',album=''), which, as above, is recommended to automatically fail if only song is given
+- index(band,album=''), which should return a list of the band's tracks (on that album), for populating the local index
+- massfetch(band,album=''), which should fetch lyrics to all tracks of the band (on that album) and return them in a list
+- This will, at some point, become a definitive list of optional functions that the fetcher supports; it will always accept recommendations for change.
+6) Input formats: All names will be passed in exactly as the user presents the information, it is the source's job to wrangle it correctly into URLs. For example, "wither." is an album name that may be given to the source. It should generally assume that words will be separated with spaces, but there's no guarantee. Also it may occasionally get bands and songs passed in as each other. It isn't expected to figure that out.
--- a/sources/com_bandcamp.py
+++ b/sources/com_bandcamp.py
@@ -0,0 +1,72 @@
+enabled=True
+
+import requests as r
+
+def processneedle(gunk,start,needle):
+	try: start+=gunk[start:].index(needle[0])+len(needle[0])
+	except ValueError: print('failed needle:',needle); return None,None
+	end=gunk[start+1:].index(needle[1])+1
+	return gunk[start:start+end],start+end+len(needle[1])
+
+def stringiter(gunk:str,needle:tuple=('',''),needles:list=[]):
+	start=0
+	if needles:
+		# Non-interactive mode. You have all the info upfront.
+		needle=0
+		while True:
+			out={}
+			for needle in needles:
+				res,start=processneedle(gunk,start,needle)
+				if res==start==None: return
+				try: out[needle[2]]=res
+				except IndexError:
+					try: out[''].append(res)
+					except KeyError: out['']=[res]
+			yield out
+	# Interactive mode. I guess don't use this as much.
+	if needle==('',''): needle=yield None
+	while True:
+		res,start=processneedle(gunk,start,needle)
+		if res==start==None: return
+		yield res
+
+def bandget(band):
+	peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,
+	# I'll put error handling in later
+	# I'll also demand a real API later
+	albums=[]
+	needles=[('href="/album/','"','urlid'),
+('img src="','"','coverurl'),
+('class="title">\n            ','\n','title')]
+	for needle in stringiter(peg,needles=needles):
+		albums.append(needle)
+	return albums
+
+def albumget(band,album,mode=0):
+	# Returns track list or all tracks' lyrics, based on mode
+	peg=r.get(f'https://{band}.bandcamp.com/album/{album}').content.decode('utf-8')
+	tracks=[]
+	needles=[('rel="tracknum=','"','num'),
+('a href="/track/','">','urlid'),
+('span class="track-title">','</span>','title'),
+('<span class="time secondaryText">\n        \n            ','\n','duration')]
+	# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.
+	for needle in stringiter(peg,needles=needles):
+		tracks.append(needle)
+	return tracks
+
+def index(band,album=''):
+	if album:
+		return albumget(band,album,0)
+	albums=bandget(band)
+	return {x:albumget(band,x,0) for x in map(lambda x:x['urlid'],albums)}
+
+def lyrics(song,band='',album=''):
+	if not (band):
+		return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."
+	band=band.replace(' ',''); song=song.replace(' ','-')
+	peg=r.get(f'https://{band}.bandcamp.com/track/{song}').content.decode('utf-8')
+	needle=('<div class="tralbumData lyricsText">','</div>','lyrics')
+	lyrics=next(stringiter(peg,needle=needle))
+	lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')
+	return lyrics
--- a/sources/com_fandom_lyrics.py
+++ b/sources/com_fandom_lyrics.py
@@ -0,0 +1,25 @@
+enabled=False
+"""
+Lyrics fandom has been deleted with no notice and no reason given, even when asked directly.
+It will never be reinstated, this file will never work again. It can be safely deleted.
+It's still included as a reminder of one of the nice things we just can't have thanks to, probably, copyright.
+"""
+
+def lyrics(band,song):
+	#Remote
+	p={'format':'json', 'action':'parse', 'prop':'wikitext', 'page':f'{band}:{song}'}
+	lyrics=r.get('https://lyrics.fandom.com/api.php',params=p)
+	try:
+		lyrics=lyrics.json()['parse']['wikitext']['*']
+	except KeyError: return "Couldn't find it boi"
+	#Follow redirects
+	while '#redirect' in lyrics.lower():
+		p['page']=lyrics[lyrics.index('[[')+2:lyrics.index(']]')]
+		lyrics=r.get('https://lyrics.fandom.com/api.php',params=p).json()['parse']['wikitext']['*']
+	out=lyrics[lyrics.index('<lyrics>')+8:lyrics.index('</lyrics>')]
+	return out
+
+def url(band,song):
+	p={'format':'json', 'action':'parse', 'prop':'wikitext', 'page':f'{band}:{song}'}
+	p="&".join([f'{k}={v}' for k,v in p.items()])
+	return f'https://lyrics.fandom.com/wiki/{band}:{song}',f'https://lyrics.fandom.com/api.php?{p}'
				`@@ -0,0 +1 @@`
				`A lyrics fetching tool, written in python. Similarly designed to a package manager. Manages indexing itself, sources don't need to do that.`