#!/usr/bin/env python

import os
import sys
import glob
import thunder

# get arguments
args = sys.argv[1:]
if len(args) < 2:
	print >> sys.stderr, "usage: thunder-submit <analysis> <opts>"
 	exit(-1)
analysis = args[0]
opts = ' '.join(args[1:])
thunderdir = os.path.dirname(os.path.realpath(thunder.__file__))
analysis = os.path.join(thunderdir, analysis + ".py")

# check for SPARK_HOME
spark = os.getenv('SPARK_HOME')
if spark is None or spark == '':
    raise Exception('must assign the environemntal variable SPARK_HOME with the location of Spark')

# check the name of the master, set explicitly to local if empty, unless we're on ec2
master = os.getenv('MASTER')
ec2 = os.path.isfile('/root/spark-ec2/cluster-url')
if master is None:
	if ec2:
		master = os.popen('cat /root/spark-ec2/cluster-url').read().replace('\n','')
	else:
		master = "local"

# if not running locally, check whether egg exists, and if not, build it
if not ("local" in master):

  # get calling directory
	calldir = os.path.dirname(os.path.realpath(__file__))

	# get directory
	dist = os.path.join(calldir, '../dist')

	# check for egg
	egg = glob.glob(os.path.join(dist, "*.egg"))

	if len(egg) == 0:
		os.system(os.path.join(calldir, 'build'))
		egg = glob.glob(os.path.join(dist, "*.egg"))

	files = "--py-files " + egg[0]


master = "--master " + master

if not ("local" in master):
	os.system(os.path.join(spark, 'bin/spark-submit ') + master + " " + analysis + " " + files + " " + opts)
else:
	os.system(os.path.join(spark, 'bin/spark-submit ') + master + " " + analysis + " " + opts)
