lyrics-fetcher/sources/com_bandcamp.py

enabled=True

import requests as r

def processneedle(gunk,start,needle):
	try: start+=gunk[start:].index(needle[0])+len(needle[0])
	except ValueError: print('failed needle:',needle); return None,None
	end=gunk[start+1:].index(needle[1])+1
	return gunk[start:start+end],start+end+len(needle[1])

def stringiter(gunk:str,needle:tuple=('',''),needles:list=[]):
	start=0
	if needles:
		# Non-interactive mode. You have all the info upfront.
		needle=0
		while True:
			out={}
			for needle in needles:
				res,start=processneedle(gunk,start,needle)
				if res==start==None: return
				try: out[needle[2]]=res
				except IndexError:
					try: out[''].append(res)
					except KeyError: out['']=[res]
			yield out
	# Interactive mode. I guess don't use this as much.
	if needle==('',''): needle=yield None
	while True:
		res,start=processneedle(gunk,start,needle)
		if res==start==None: return
		yield res

def bandget(band):
	peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,
	# I'll put error handling in later
	# I'll also demand a real API later
	albums=[]
	needles=[('href="/album/','"','urlid'),
('img src="','"','coverurl'),
('class="title">\n            ','\n','title')]
	for needle in stringiter(peg,needles=needles):
		albums.append(needle)
	return albums

def albumget(band,album,mode=0):
	# Returns track list or all tracks' lyrics, based on mode
	peg=r.get(f'https://{band}.bandcamp.com/album/{album}').content.decode('utf-8')
	tracks=[]
	needles=[('rel="tracknum=','"','num'),
('a href="/track/','">','urlid'),
('span class="track-title">','</span>','title'),
('<span class="time secondaryText">\n        \n            ','\n','duration')]
	# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.
	for needle in stringiter(peg,needles=needles):
		tracks.append(needle)
	return tracks

def index(band,album=''):
	if album:
		return albumget(band,album,0)
	albums=bandget(band)
	return {x:albumget(band,x,0) for x in map(lambda x:x['urlid'],albums)}

def lyrics(song,band='',album=''):
	if not (band):
		return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."
	band=band.replace(' ',''); song=song.replace(' ','-')
	peg=r.get(f'https://{band}.bandcamp.com/track/{song}').content.decode('utf-8')
	needle=('<div class="tralbumData lyricsText">','</div>','lyrics')
	lyrics=next(stringiter(peg,needle=needle))
	lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')
	return lyrics
initcommit, I'll add .gitignore later 2021-11-17 22:44:26 +11:00			`enabled=True`

			`import requests as r`

			`def processneedle(gunk,start,needle):`
			`try: start+=gunk[start:].index(needle[0])+len(needle[0])`
			`except ValueError: print('failed needle:',needle); return None,None`
			`end=gunk[start+1:].index(needle[1])+1`
			`return gunk[start:start+end],start+end+len(needle[1])`

			`def stringiter(gunk:str,needle:tuple=('',''),needles:list=[]):`
			`start=0`
			`if needles:`
			`# Non-interactive mode. You have all the info upfront.`
			`needle=0`
			`while True:`
			`out={}`
			`for needle in needles:`
			`res,start=processneedle(gunk,start,needle)`
			`if res==start==None: return`
			`try: out[needle[2]]=res`
			`except IndexError:`
			`try: out[''].append(res)`
			`except KeyError: out['']=[res]`
			`yield out`
			`# Interactive mode. I guess don't use this as much.`
			`if needle==('',''): needle=yield None`
			`while True:`
			`res,start=processneedle(gunk,start,needle)`
			`if res==start==None: return`
			`yield res`

			`def bandget(band):`
			`peg=r.get(f'https://{band}.bandcamp.com/').content.decode('utf-8') # Just assume success,`
			`# I'll put error handling in later`
			`# I'll also demand a real API later`
			`albums=[]`
			`needles=[('href="/album/','"','urlid'),`
			`('img src="','"','coverurl'),`
			`('class="title">\n ','\n','title')]`
			`for needle in stringiter(peg,needles=needles):`
			`albums.append(needle)`
			`return albums`

			`def albumget(band,album,mode=0):`
			`# Returns track list or all tracks' lyrics, based on mode`
			`peg=r.get(f'https://{band}.bandcamp.com/album/{album}').content.decode('utf-8')`
			`tracks=[]`
			`needles=[('rel="tracknum=','"','num'),`
			`('a href="/track/','">','urlid'),`
			`('span class="track-title">','</span>','title'),`
			`('<span class="time secondaryText">\n \n ','\n','duration')]`
			`# Despite the lyrics being in the pages, it's not actually safe to get them with this system because any track without lyrics will quietly delete all tracks after it, up to and including the next one with lyrics.`
			`for needle in stringiter(peg,needles=needles):`
			`tracks.append(needle)`
			`return tracks`

			`def index(band,album=''):`
			`if album:`
			`return albumget(band,album,0)`
			`albums=bandget(band)`
			`return {x:albumget(band,x,0) for x in map(lambda x:x['urlid'],albums)}`

			`def lyrics(song,band='',album=''):`
			`if not (band):`
			`return "Bandcamp does not currently support song search (due to being a webscraper). You must specify the band that performed the song."`
			`band=band.replace(' ',''); song=song.replace(' ','-')`
			`peg=r.get(f'https://{band}.bandcamp.com/track/{song}').content.decode('utf-8')`
			`needle=('<div class="tralbumData lyricsText">','</div>','lyrics')`
			`lyrics=next(stringiter(peg,needle=needle))`
			`lyrics=lyrics.replace('\r','').replace('\n','').replace('<br>','\n')`
			`return lyrics`