lyrics-fetcher/sources/com_bandcamp.py

73 lines
2.6 KiB
Python
Raw Normal View History

2021-11-17 22:44:26 +11:00
enabled=True
import requests as r
def processneedle(gunk,start,needle):
try: start+=gunk[start:].index(needle[0])+len(needle[0])
except ValueError: print('failed needle:',needle); return None,None
end=gunk[start+1:].index(needle[1])+1
return gunk[start:start+end],start+end+len(needle[1])
def stringiter(gunk:str,needle:tuple=('',''),needles:list=[]):
start=0
if needles:
# Non-interactive mode. You have all the info upfront.
needle=0
while True:
out={}
for needle in needles:
res,start=processneedle(gunk,start,needle)
if res==start==None: return
try: out[needle[2]]=res
except IndexError:
try: out[''].append(res)
except KeyError: out['']=[res]
yield out
# Interactive mode. I guess don't use this as much.
if needle==('',''): needle=yield None
while True:
res,start=processneedle(gunk,start,needle)
if res==start==None: return
yield res
def bandget(band):
peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,
# I'll put error handling in later
# I'll also demand a real API later
albums=[]
needles=[('href="/album/','"','urlid'),
('img src="','"','coverurl'),
('class="title">\n ','\n','title')]
for needle in stringiter(peg,needles=needles):
albums.append(needle)
return albums
def albumget(band,album,mode=0):
# Returns track list or all tracks' lyrics, based on mode
peg=r.get(f'https://{band}.bandcamp.com/album/{album}').content.decode('utf-8')
tracks=[]
needles=[('rel="tracknum=','"','num'),
('a href="/track/','">','urlid'),
('span class="track-title">','</span>','title'),
('<span class="time secondaryText">\n \n ','\n','duration')]
# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.
for needle in stringiter(peg,needles=needles):
tracks.append(needle)
return tracks
def index(band,album=''):
if album:
return albumget(band,album,0)
albums=bandget(band)
return {x:albumget(band,x,0) for x in map(lambda x:x['urlid'],albums)}
def lyrics(song,band='',album=''):
if not (band):
return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."
band=band.replace(' ',''); song=song.replace(' ','-')
peg=r.get(f'https://{band}.bandcamp.com/track/{song}').content.decode('utf-8')
needle=('<div class="tralbumData lyricsText">','</div>','lyrics')
lyrics=next(stringiter(peg,needle=needle))
lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')
return lyrics