initcommit, I'll add .gitignore later

This commit is contained in:
2021-11-17 22:44:26 +11:00
commit 0a11464ce6
6 changed files with 178 additions and 0 deletions

14
sources/README.md Normal file
View File

@@ -0,0 +1,14 @@
Not actually markdown, haha tricked ya!
Documentation for files in this folder.
1) Every file must be a python script
2) It must be named with the domain it serves as the filename (starting from TLD and narrowing down; dots and slashes should be converted to underscores - the fetcher won't actually try to restore it and load the URL).
3) It must expose an "enabled" global boolean that, if False, will cause the fetcher to ignore its existence entirely. This is useful for sources that are temporarily not working, or for sources that are pending proper updates to the source's changes, etc.
4) It must expose a lyrics(song,band='',album='') function that returns song lyrics as plaintext (str), or as arranged lyrics (dict\[sections (list),arrangement (list)\]). It's recommended that if neither band nor album are given, the function should automatically return a failure, or explicitly recommend a search, rather than attempting to work with it.
5) It may expose other functions, such as:
- search(song,band='',album=''), which, as above, is recommended to automatically fail if only song is given
- index(band,album=''), which should return a list of the band's tracks (on that album), for populating the local index
- massfetch(band,album=''), which should fetch lyrics to all tracks of the band (on that album) and return them in a list
- This will, at some point, become a definitive list of optional functions that the fetcher supports; it will always accept recommendations for change.
6) Input formats: All names will be passed in exactly as the user presents the information, it is the source's job to wrangle it correctly into URLs. For example, "wither." is an album name that may be given to the source. It should generally assume that words will be separated with spaces, but there's no guarantee. Also it may occasionally get bands and songs passed in as each other. It isn't expected to figure that out.

72
sources/com_bandcamp.py Normal file
View File

@@ -0,0 +1,72 @@
enabled=True
import requests as r
def processneedle(gunk,start,needle):
try: start+=gunk[start:].index(needle[0])+len(needle[0])
except ValueError: print('failed needle:',needle); return None,None
end=gunk[start+1:].index(needle[1])+1
return gunk[start:start+end],start+end+len(needle[1])
def stringiter(gunk:str,needle:tuple=('',''),needles:list=[]):
start=0
if needles:
# Non-interactive mode. You have all the info upfront.
needle=0
while True:
out={}
for needle in needles:
res,start=processneedle(gunk,start,needle)
if res==start==None: return
try: out[needle[2]]=res
except IndexError:
try: out[''].append(res)
except KeyError: out['']=[res]
yield out
# Interactive mode. I guess don't use this as much.
if needle==('',''): needle=yield None
while True:
res,start=processneedle(gunk,start,needle)
if res==start==None: return
yield res
def bandget(band):
peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,
# I'll put error handling in later
# I'll also demand a real API later
albums=[]
needles=[('href="/album/','"','urlid'),
('img src="','"','coverurl'),
('class="title">\n ','\n','title')]
for needle in stringiter(peg,needles=needles):
albums.append(needle)
return albums
def albumget(band,album,mode=0):
# Returns track list or all tracks' lyrics, based on mode
peg=r.get(f'https://{band}.bandcamp.com/album/{album}').content.decode('utf-8')
tracks=[]
needles=[('rel="tracknum=','"','num'),
('a href="/track/','">','urlid'),
('span class="track-title">','</span>','title'),
('<span class="time secondaryText">\n \n ','\n','duration')]
# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.
for needle in stringiter(peg,needles=needles):
tracks.append(needle)
return tracks
def index(band,album=''):
if album:
return albumget(band,album,0)
albums=bandget(band)
return {x:albumget(band,x,0) for x in map(lambda x:x['urlid'],albums)}
def lyrics(song,band='',album=''):
if not (band):
return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."
band=band.replace(' ',''); song=song.replace(' ','-')
peg=r.get(f'https://{band}.bandcamp.com/track/{song}').content.decode('utf-8')
needle=('<div class="tralbumData lyricsText">','</div>','lyrics')
lyrics=next(stringiter(peg,needle=needle))
lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')
return lyrics

View File

@@ -0,0 +1,25 @@
enabled=False
"""
Lyrics fandom has been deleted with no notice and no reason given, even when asked directly.
It will never be reinstated, this file will never work again. It can be safely deleted.
It's still included as a reminder of one of the nice things we just can't have thanks to, probably, copyright.
"""
def lyrics(band,song):
#Remote
p={'format':'json', 'action':'parse', 'prop':'wikitext', 'page':f'{band}:{song}'}
lyrics=r.get('https://lyrics.fandom.com/api.php',params=p)
try:
lyrics=lyrics.json()['parse']['wikitext']['*']
except KeyError: return "Couldn't find it boi"
#Follow redirects
while '#redirect' in lyrics.lower():
p['page']=lyrics[lyrics.index('[[')+2:lyrics.index(']]')]
lyrics=r.get('https://lyrics.fandom.com/api.php',params=p).json()['parse']['wikitext']['*']
out=lyrics[lyrics.index('<lyrics>')+8:lyrics.index('</lyrics>')]
return out
def url(band,song):
p={'format':'json', 'action':'parse', 'prop':'wikitext', 'page':f'{band}:{song}'}
p="&".join([f'{k}={v}' for k,v in p.items()])
return f'https://lyrics.fandom.com/wiki/{band}:{song}',f'https://lyrics.fandom.com/api.php?{p}'