initcommit, I'll add .gitignore later

2021-11-17 22:44:26 +11:00
commit 0a11464ce6
6 changed files with 178 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1 @@
 A lyrics fetching tool, written in python. Similarly designed to a package manager. Manages indexing itself, sources don't need to do that.
--- a/config.json.example
+++ b/config.json.example
@@ -0,0 +1,4 @@
 {
 "lyrics":"/path/to/lyrics/files",
 "indexcache":"~/.cache/known_songs"
 }
--- a/getlyrics.py
+++ b/getlyrics.py
@@ -0,0 +1,62 @@
 import requests as r
 import sys
 import os
 import json as j
 os.chdir('/home/wisknort/lyricise') # Do this first so that the following work.
 with open('config.json') as b: conf=j.load(b)
 import importlib as il
 sources=[il.import_module(f'sources.{n[:-3]}') for n in os.listdir('sources') if n.endswith('.py')]
 sources=list(filter(lambda x:x.enabled,sources))
 class FlashyNotMountedError(Exception):
 pass
 def init(args):
 	#Init
 	split=args.index('-')
 	band=' '.join([n.capitalize() for n in args[:split]])
 	song=' '.join([n.capitalize() for n in args[split+1:]])
 	return band,song
 def local(band,song):
 	#Local
 	#All lyrics that are fetched from remote will automatically get saved in here for later reference
 	#Also, any time the existence of a song on a remote is proven (eg. the remote offers an album list for a band, which in turn offers a track list, and this needs to be parsed to get the correct URL), that will be indexed locally for future reference, in ./.indices
 	#Deleting that folder will reset that, and should always be safe.
 	#Indices will be one file per source, so deleting individual files will be part of uninstalling a source.
 	try:
 		os.chdir(conf['lyrics'])
 		prep=lambda x:x.lower().replace(' ','_')
 		try:
 			with open(f'{prep(song)}_{prep(band)}.lyric') as b:
 				lyrics=b.read()
 		except FileNotFoundError: lyrics=''
 		return lyrics
 	except FileNotFoundError:
 		raise FlashyNotMountedError
 def remote(band,song):
 	#Runs through sources and tries to locate the song and fetch lyrics.
 	#Will search all local indices first, and offer which repository to fetch from if multiple found
 	#But will *not* poll all remote repositories; if not found in local indices, will begin polling sources in whatever order the underlying (他框)walk decides, and will stop as soon as an exact match is found.
 	#If a partial match is found, will offer to user whether it's close enough.
 	if not sources:
 		print('No sources configured! Failed to fetch lyrics from remote!')
 	#TODO
 	lyrics=''
 	for source in sources:
 		try: lyrics=source.lyrics(song,band)
 		except Exception: raise # This should actually catch errors later
 		if lyrics: break
 	return lyrics
 if __name__=='__main__':
 	band,song=init(sys.argv[1:])
 	try: lyrics=local(band,song)
 	except FlashyNotMountedError:
 		if input("Flashy not mounted, proceed? (y/n)")!='y': exit()
 		lyrics=''
 	if not lyrics:
 		print("Lyrics not found locally, searching for remote...")
 		lyrics=remote(band,song) or ''
 	if lyrics: print(lyrics)
--- a/sources/README.md
+++ b/sources/README.md
@@ -0,0 +1,14 @@
 Not actually markdown, haha tricked ya!
 Documentation for files in this folder.
 1) Every file must be a python script
 2) It must be named with the domain it serves as the filename (starting from TLD and narrowing down; dots and slashes should be converted to underscores - the fetcher won't actually try to restore it and load the URL).
 3) It must expose an "enabled" global boolean that, if False, will cause the fetcher to ignore its existence entirely. This is useful for sources that are temporarily not working, or for sources that are pending proper updates to the source's changes, etc.
 4) It must expose a lyrics(song,band='',album='') function that returns song lyrics as plaintext (str), or as arranged lyrics (dict\[sections (list),arrangement (list)\]). It's recommended that if neither band nor album are given, the function should automatically return a failure, or explicitly recommend a search, rather than attempting to work with it.
 5) It may expose other functions, such as:
 - search(song,band='',album=''), which, as above, is recommended to automatically fail if only song is given
 - index(band,album=''), which should return a list of the band's tracks (on that album), for populating the local index
 - massfetch(band,album=''), which should fetch lyrics to all tracks of the band (on that album) and return them in a list
 - This will, at some point, become a definitive list of optional functions that the fetcher supports; it will always accept recommendations for change.
 6) Input formats: All names will be passed in exactly as the user presents the information, it is the source's job to wrangle it correctly into URLs. For example, "wither." is an album name that may be given to the source. It should generally assume that words will be separated with spaces, but there's no guarantee. Also it may occasionally get bands and songs passed in as each other. It isn't expected to figure that out.
--- a/sources/com_bandcamp.py
+++ b/sources/com_bandcamp.py
@@ -0,0 +1,72 @@
 enabled=True
 import requests as r
 def processneedle(gunk,start,needle):
 	try: start+=gunk[start:].index(needle[0])+len(needle[0])
 	except ValueError: print('failed needle:',needle); return None,None
 	end=gunk[start+1:].index(needle[1])+1
 	return gunk[start:start+end],start+end+len(needle[1])
 def stringiter(gunk:str,needle:tuple=('',''),needles:list=[]):
 	start=0
 	if needles:
 		# Non-interactive mode. You have all the info upfront.
 		needle=0
 		while True:
 			out={}
 			for needle in needles:
 				res,start=processneedle(gunk,start,needle)
 				if res==start==None: return
 				try: out[needle[2]]=res
 				except IndexError:
 					try: out[''].append(res)
 					except KeyError: out['']=[res]
 			yield out
 	# Interactive mode. I guess don't use this as much.
 	if needle==('',''): needle=yield None
 	while True:
 		res,start=processneedle(gunk,start,needle)
 		if res==start==None: return
 		yield res
 def bandget(band):
 	peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,
 	# I'll put error handling in later
 	# I'll also demand a real API later
 	albums=[]
 	needles=[('href="/album/','"','urlid'),
 ('img src="','"','coverurl'),
 ('class="title">\n            ','\n','title')]
 	for needle in stringiter(peg,needles=needles):
 		albums.append(needle)
 	return albums
 def albumget(band,album,mode=0):
 	# Returns track list or all tracks' lyrics, based on mode
 	peg=r.get(f'https://{band}.bandcamp.com/album/{album}').content.decode('utf-8')
 	tracks=[]
 	needles=[('rel="tracknum=','"','num'),
 ('a href="/track/','">','urlid'),
 ('span class="track-title">','</span>','title'),
 ('<span class="time secondaryText">\n        \n            ','\n','duration')]
 	# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.
 	for needle in stringiter(peg,needles=needles):
 		tracks.append(needle)
 	return tracks
 def index(band,album=''):
 	if album:
 		return albumget(band,album,0)
 	albums=bandget(band)
 	return {x:albumget(band,x,0) for x in map(lambda x:x['urlid'],albums)}
 def lyrics(song,band='',album=''):
 	if not (band):
 		return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."
 	band=band.replace(' ',''); song=song.replace(' ','-')
 	peg=r.get(f'https://{band}.bandcamp.com/track/{song}').content.decode('utf-8')
 	needle=('<div class="tralbumData lyricsText">','</div>','lyrics')
 	lyrics=next(stringiter(peg,needle=needle))
 	lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')
 	return lyrics
--- a/sources/com_fandom_lyrics.py
+++ b/sources/com_fandom_lyrics.py
@@ -0,0 +1,25 @@
 enabled=False
 """
 Lyrics fandom has been deleted with no notice and no reason given, even when asked directly.
 It will never be reinstated, this file will never work again. It can be safely deleted.
 It's still included as a reminder of one of the nice things we just can't have thanks to, probably, copyright.
 """
 def lyrics(band,song):
 	#Remote
 	p={'format':'json', 'action':'parse', 'prop':'wikitext', 'page':f'{band}:{song}'}
 	lyrics=r.get('https://lyrics.fandom.com/api.php',params=p)
 	try:
 		lyrics=lyrics.json()['parse']['wikitext']['*']
 	except KeyError: return "Couldn't find it boi"
 	#Follow redirects
 	while '#redirect' in lyrics.lower():
 		p['page']=lyrics[lyrics.index('[[')+2:lyrics.index(']]')]
 		lyrics=r.get('https://lyrics.fandom.com/api.php',params=p).json()['parse']['wikitext']['*']
 	out=lyrics[lyrics.index('<lyrics>')+8:lyrics.index('</lyrics>')]
 	return out
 def url(band,song):
 	p={'format':'json', 'action':'parse', 'prop':'wikitext', 'page':f'{band}:{song}'}
 	p="&".join([f'{k}={v}' for k,v in p.items()])
 	return f'https://lyrics.fandom.com/wiki/{band}:{song}',f'https://lyrics.fandom.com/api.php?{p}'
		`@@ -0,0 +1 @@`
							`A lyrics fetching tool, written in python. Similarly designed to a package manager. Manages indexing itself, sources don't need to do that.`