import cookielib
import getpass
import sys
import urllib
import urllib2
import HTMLParser
import time

def get_page(urlOpener, url, data = None):
	request = urllib2.Request(url)
	url = None
	retries = 10
	while True:
		try:
			url = urlOpener.open(request, data)
		except Exception:
			if retries == 0:
				raise Exception("Failed to fetch url %s" % (url))
			else:
				retries -= 1
				time.sleep(10)
		else:
			break
			
	page = url.read(1024 * 1024)
	return page

def alist_to_dict(alist):
	dict = {}
	for a in alist:
		dict[a[0]] = a[1]
	return dict

class WebLoginRedirectHTMLParser(HTMLParser.HTMLParser):
	def __init__(self):
		HTMLParser.HTMLParser.__init__(self)
		self.redirect = ""

	def handle_starttag(self, tag, attrs):
		if tag == "a":
			d = alist_to_dict(attrs)
			if "id" in d and d["id"] == "continue_link":
				self.redirect = d["href"]

	def get_redirect(self):
		return self.redirect

class WebLoginHTMLParser(HTMLParser.HTMLParser):
	def __init__(self):
		HTMLParser.HTMLParser.__init__(self)
		self.crud = { "RT" : "", "ST" : "", "LC" : "" }

	def handle_starttag(self, tag, attrs):
		if tag == "input":
			d = alist_to_dict(attrs)
			if d["type"] == "hidden":
				if d["name"] in self.crud:
					self.crud[d["name"]] = d["value"]

	def get_hiddens(self):
		return self.crud

class HTMLCrudParser(HTMLParser.HTMLParser):
	def __init__(self):
		HTMLParser.HTMLParser.__init__(self)
		self.title = ""
		self.on_title = False
		self.urls = []
		self.inputs = []

	def handle_starttag(self, tag, attrs):
		if tag == "a":
			d = alist_to_dict(attrs)
			for key in d:
				if key.lower() == "href":
					self.urls.append(d[key])
					break
		elif tag == "input":
			d = alist_to_dict(attrs)
			for key in d:
				if key.lower() == "id":
					self.inputs.append(d)
					break
		elif tag == "title":
			self.on_title = True

	def handle_endtag(self, tag):
		if tag == "title":
			self.on_title = False

	def handle_data(self, data):
		if self.on_title:
			self.title += data.strip()

	def get_urls(self):
		return self.urls

	def get_inputs(self):
		return self.inputs

	def get_title(self):
		return self.title

class Course():
	def __init__(self, name, number):
		self.name = name
		self.number = number
		self.lectures = []

	def get_name(self):
		return self.name

	def get_number(self):
		return self.number

	def get_lectures(self):
		return self.lectures

	def add_lecture(self, lecture):
		self.lectures.append(lecture)

print >> sys.stderr, "--> SUNET authentication required:"
print >> sys.stderr, "---> Login: ",
login = sys.stdin.readline().strip()
print >> sys.stderr, "\r",
password = getpass.getpass("---> Password: ", sys.stderr)
print >> sys.stderr, "\r",

# set up cookies
cookies = cookielib.CookieJar()
urlOpener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies))

# request myvideosu, which redirects to stanford weblogin
page = get_page(urlOpener, "https://myvideosu.stanford.edu")
webloginparser = WebLoginHTMLParser()
webloginparser.feed(page)
hiddens = webloginparser.get_hiddens()

# issue username / password to stanford weblogin; we'll be re-directed to SCPD's page
data = urllib.urlencode({ "username" : login, "password" : password, "login" : "yes", "RT" : hiddens["RT"], "ST" : hiddens["ST"], "LC" : hiddens["LC"] })
page = get_page(urlOpener, "https://weblogin.stanford.edu/login/login", data) 

if not 'webauth_wpt_krb5' in [cookie.name for cookie in cookies]:
	print >> sys.stderr, "--> Login failed."
	sys.exit(1)

print >> sys.stderr, "--> Logged in."

# parse the redirect
redirparser = WebLoginRedirectHTMLParser()
redirparser.feed(page)
redirect = redirparser.get_redirect() 

# parse SCPD's main page
page = get_page(urlOpener, redirect)
crud = HTMLCrudParser()
crud.feed(page)

print >> sys.stderr, "--> Loading Quarter \"%s\"" % (crud.get_title())

courses = []

# for each course, load its page to get the lectures
# for each page of lectures, load each individual meeting to grab the mms url
#   meetings are in reverse-chronological order
print >> sys.stderr, "--> Loading courses..."
for c in crud.get_urls():
	if c.find("GradCourseInfo.aspx") == -1:
		continue

	url = "https://myvideosu.stanford.edu" + c
	page = get_page(urlOpener, url)
	crud2 = HTMLCrudParser()
	crud2.feed(page)
	title = crud2.get_title()
	number = title[0:title.find(" ")].strip()
	name   = title[title.find(" "):].strip()
	print >> sys.stderr, "---> %s (%s)" % (number, name)
	c = Course(name, number)
	meetings = int(page.split("Course Meetings: (")[1].split(")")[0])
	novideos = page.count("No video")
	for l in crud2.get_urls():
		if l.find("openslplayer.aspx") != -1 and l.find("wmp=true") != -1:
			u = l.split("'")[1]
			page = get_page(urlOpener, u)
			crud3 = HTMLCrudParser()
			crud3.feed(page)
			ins = crud3.get_inputs()
			mms = None
			for i in ins:
				if i["id"] == "video_URL":
					mms = i["value"]
			if mms == None:
				print >> sys.stderr, "---> Error: missing mms url for %s" % (u)
			c.add_lecture(mms)
	print >> sys.stderr, "----> %d lectures" % (len(c.get_lectures()))
	if (meetings - novideos) != len(c.get_lectures()):
		print >> sys.stderr, "-----> WARNING: MAY NOT HAVE OBTAINED ALL LECTURES! (page says %d, found %d, no videos %d)" % (meetings, len(c.get_lectures()), novideos)
	courses.append(c)

print >> sys.stderr, "--> Dumping courses..."

for c in courses:
	number = c.get_number()

	# reverse-chronological order
	i = 0
	lectures = c.get_lectures()
	lectures.reverse()
	for l in lectures:
		print "%s\t%d\t%s" % (number, i, l)
		i += 1
