Module maven_iuvs.download
Expand source code
import os
import glob
import subprocess
import time
import tempfile
import datetime
from getpass import getpass
# twill's __init__.py is dumb, we need to work around it to play nice
# with jupyter:
import sys
_stdout = sys.stdout
_stderr = sys.stdout
import twill
twill.set_output(_stdout)
twill.set_errout(_stderr)
twill.set_loglevel(twill.loglevels['WARNING'])
import pexpect
import paramiko
import numpy as np
from maven_iuvs.miscellaneous import clear_line
from maven_iuvs.search import get_latest_files
def get_user_paths_filename():
"""
Determines whether user_paths.py exists and returns the filename
if it does.
Parameters
----------
none
Returns
-------
file_exists : bool
Whether user_paths.py exists
user_paths_py : str
Absolute file path to user_paths.py
"""
pyuvs_path = os.path.dirname(os.path.realpath(__file__))
user_paths_py = os.path.join(pyuvs_path, "user_paths.py")
file_exists = os.path.exists(user_paths_py)
return file_exists, user_paths_py
def setup_user_paths():
"""
Generates user_paths.py, used by sync_data to read data from the
IUVS VM and store it locally
Parameters
----------
none
Returns
-------
none
Notes
-------
This is an interactive routine called once, generally the first
time the user calls sync_data.
"""
# if user_paths.py already exists then assume that the user has
# set everything up already
file_exists, user_paths_py = get_user_paths_filename()
if file_exists:
return
# get the location of the default L1B and SPICE directory
print("Syncing all of the L1B data could take up to 2TB of disk space.")
l1b_dir = input("Where would you like IUVS l1b FITS files"
" to be stored by sync_data? ")
print("Syncing all of the SPICE kernels could take up to 300GB of disk"
" space.")
spice_dir = input("Where would you like MAVEN/IUVS SPICE"
" kernels to be stored by sync_data? ")
# get the VM username to be used in rsync calls
vm_username = input("What is your username for the"
" IUVS VM to sync files? ")
user_paths_file = open(user_paths_py, "x")
user_paths_file.write("# This file automatically generated by"
" maven_iuvs.download.setup_file_paths\n")
user_paths_file.write("l1b_dir = \""+l1b_dir+"\"\n")
user_paths_file.write("spice_dir = \""+spice_dir+"\"\n")
user_paths_file.write("iuvs_vm_username = \""+vm_username+"\"\n")
user_paths_file.close()
# now scripts can import the relevant directories from user_paths
def call_rsync(remote_path,
local_path,
ssh_password,
extra_flags=""):
"""
Updates the SPICE kernels by rsyncing the VM folders to the local machine.
Parameters
----------
remote_path : str
Path to sync on the remote machine.
local_path : str
Path to the sync on the local machine.
ssh_password : str
Plain text to send to process when it prompts for a password
extra_flags : str
Extra flags for rsync command.
-trzL and -info=progress2 are already specified, extra_flags
text are inserted afterward. Defaults to "".
Returns
-------
none
"""
# get the version number of rsync
try:
result = subprocess.run(['rsync', '--version'],
stdout=subprocess.PIPE,
check=True)
version = result.stdout.split(b'version')[1].split()[0]
version = int(version.replace(b".", b""))
except subprocess.CalledProcessError:
raise Exception("rsync failed ---"
" is rsync installed on your system?")
if version >= 313:
# we can print total transfer progress
progress_flag = '--info=progress2'
else:
progress_flag = '--progress'
rsync_command = " ".join(['rsync -trvzL',
progress_flag,
extra_flags,
remote_path,
local_path])
print("running rsync_command: " + rsync_command)
child = pexpect.spawn(rsync_command,
encoding='utf-8')
cpl = child.compile_pattern_list(['.* password: ',
'[0-9]+%'])
child.expect_list(cpl)
if 'password' in child.after:
# respond to server password request
child.sendline(ssh_password)
# print some progress info by searching for lines with a
# percentage progress
cpl = child.compile_pattern_list([pexpect.EOF,
'[0-9]+%'])
while True:
i = child.expect_list(cpl, timeout=None)
if i == 0: # end of file
break
if i == 1:
percent = child.after.strip(" \t\n\t")
# get file left to check also
child.expect('[0-9]+/[0-9]+', timeout=None)
file_numbers = child.after
if version < 313:
# compute progress from file numbers
fnum1, fnum2 = list(map(int, file_numbers.split("/")))
percent = 1.0 - fnum1 / fnum2
percent = str(int(percent*100)) + "%"
clear_line()
print("rsync progress: " +
percent +
' (files left: ' + file_numbers + ')',
end='\r')
child.close()
clear_line() # clear last rsync message
def get_vm_file_list(server,
serverdir,
username,
password,
pattern="*.fits*",
minorb=100, maxorb=100000,
include_cruise=False,
status_tag=""):
"""
Get a list of files from the VM that match a given pattern.
Parameters
----------
server : str
name of the server to get files from (normally maven-iuvs-itf)
serverdir : str
directory to search for files matching the pattern
username : str
username for server access
password : str
password for server access
pattern : str
glob pattern used to search for matching files
Defaults to '*.fits*' (matches all FITS files)
minorb, maxorb : int
Minimum and maximum orbit numbers to sync from VM, in multiples of 100.
Defaults to 100 and 100000, but smaller ranges than the available data
will sync faster.
include_cruise : bool
Whether to sync cruise data in addition to the orbit range above.
Defaults to False.
status_tag : str
Tag to decorate orbit number print string and inform user of progress.
Defaults to "".
Returns
-------
files : np.array
list of server filenames that match the pattern
"""
# connect to the server using paramiko
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
ssh.connect(server, username=username, password=password)
# get the list of folders on the VM
stdin, stdout, stderr = ssh.exec_command('ls '+serverdir)
server_orbit_folders = np.loadtxt(stdout, dtype=str)
# determine what folders to look for files in
sync_orbit_folders = ["orbit"+str(orbno).zfill(5)
for orbno in np.arange(minorb, maxorb, 100)]
if include_cruise:
sync_orbit_folders = np.append(["cruise"], sync_orbit_folders)
# sync only folders that belong to both groups
sync_orbit_folders = server_orbit_folders[np.isin(server_orbit_folders,
sync_orbit_folders,
assume_unique=True)]
# set up the output files array
files = []
# iterate through the folder list and get the filenames that match
# the input pattern
for folder in sync_orbit_folders:
clear_line()
print(status_tag+folder, end="\r")
cmd = "ls "+serverdir+folder+"/"+pattern
stdin, stdout, stderr = ssh.exec_command(cmd)
if len(stderr.read()) == 0:
files.append(np.loadtxt(stdout, dtype=str))
else:
continue
ssh.close()
if len(files) == 0:
return []
else:
return np.concatenate(np.array(files, dtype=object))
def sync_data(spice=True, l1b=True,
pattern="*.fits*",
minorb=100, maxorb=100000,
include_cruise=False):
"""
Synchronize new SPICE kernels and L1B data from the VM and remove
any old files that have been replaced by newer versions.
Parameters
----------
spice : bool
Whether or not to sync SPICE kernels. Defaults to True.
l1b : bool
Whether or not to sync level 1B data. Defaults to True.
pattern : str
glob pattern used to search for matching files
Defaults to '*.fits*' (matches all FITS files)
minorb, maxorb : int
Minimum and maximum orbit numbers to sync from VM, in multiples of 100.
Defaults to 100 and 100000, but smaller ranges than the available data
will sync faster.
include_cruise : bool
Whether to sync cruise data in addition to the orbit range above.
Defaults to False.
Returns
-------
None.
"""
# check if user path data exists and set it if not
setup_user_paths()
# load user path data from file
from maven_iuvs.user_paths import l1b_dir, spice_dir, iuvs_vm_username
if not os.path.exists(spice_dir):
raise Exception("Cannot find specified SPICE directory."
" Is it accessible?")
if not os.path.exists(l1b_dir):
raise Exception("Cannot find specified L1B directory."
" Is it accessible?")
# get starting time
t0 = time.time()
# define VM-related variables
vm = 'maven-iuvs-itf'
login = iuvs_vm_username + '@' + vm + ':'
production_l1b = '/maven_iuvs/production/products/level1b/'
stage_l1b = '/maven_iuvs/stage/products/level1b/'
vm_spice = login + '/maven_iuvs/stage/anc/spice/'
# try to sync the files, if it fails, user probably isn't on the VPN
try:
# get user password for the VM
iuvs_vm_password = getpass('input password for '+login+' ')
# sync SPICE kernels
if spice is True:
print('Updating SPICE kernels...')
call_rsync(vm_spice, spice_dir, iuvs_vm_password,
extra_flags="--delete")
# sync level 1B data
if l1b is True:
# get the file names of all the relevant files
print('Fetching names of level 1B production and stage'
' files from the VM...')
prod_filenames = get_vm_file_list(vm,
production_l1b,
iuvs_vm_username,
iuvs_vm_password,
pattern=pattern,
minorb=minorb,
maxorb=maxorb,
include_cruise=include_cruise,
status_tag='production: ')
stage_filenames = get_vm_file_list(vm,
stage_l1b,
iuvs_vm_username,
iuvs_vm_password,
pattern=pattern,
minorb=minorb,
maxorb=maxorb,
include_cruise=include_cruise,
status_tag='stage: ')
local_filenames = glob.glob(l1b_dir+"/*/"+pattern)
# get the list of most recent files, no matter where they are
# order matters! putting local_filenames first ensures
# duplicates aren't transferred
if (len(prod_filenames) == 0 and len(stage_filenames) == 0):
print("No matching files on VM")
return
files_to_sync = get_latest_files(np.concatenate([local_filenames,
prod_filenames,
stage_filenames]))
# figure out which files to get from production and stage
files_from_production = [a[len(production_l1b):]
for a in files_to_sync
if (a[:len(production_l1b)]
==
production_l1b)]
files_from_stage = [a[len(stage_l1b):]
for a in files_to_sync
if a[:len(stage_l1b)] == stage_l1b]
# production
# save the files to rsync to temporary files
# this way rsync can use the files_from flag
transfer_from_production_file = tempfile.NamedTemporaryFile()
np.savetxt(transfer_from_production_file.name,
files_from_production,
fmt="%s")
print('Syncing ' + str(len(files_from_production)) +
' files from production...')
call_rsync(login+production_l1b,
l1b_dir,
iuvs_vm_password,
extra_flags=('--files-from=' +
transfer_from_production_file.name))
# stage, identical to above
transfer_from_stage_file = tempfile.NamedTemporaryFile()
np.savetxt(transfer_from_stage_file.name,
files_from_stage,
fmt="%s")
print('Syncing ' + str(len(files_from_stage)) +
' files from stage...')
call_rsync(login+stage_l1b,
l1b_dir,
iuvs_vm_password,
extra_flags=('--files-from=' +
transfer_from_stage_file.name))
# now delete all of the old files superseded by newer versions
clear_line()
print('Cleaning up old files...')
# figure out what files need to be deleted
local_filenames = glob.glob(l1b_dir+"/*/*.fits*")
latest_local_files = get_latest_files(local_filenames)
local_files_to_delete = np.setdiff1d(local_filenames,
latest_local_files)
# ask if it's OK to delete the old files
while True and len(local_files_to_delete) > 0:
del_files = input('Delete ' +
str(len(local_files_to_delete)) +
' old files? (y/n/p)')
if del_files == 'n':
# don't delete the files
break
if del_files == 'y':
# delete the files
[os.remove(f) for f in local_files_to_delete]
break
if del_files == 'p':
print(local_files_to_delete)
else:
print("Please answer y or n, or p to print the file list.")
# Question for merge manager:
# Kyle's code keeps a list of these deleted files
# in excluded_files.txt --- is this necessary?
# index all local files to speed up later finding
local_filenames = sorted(glob.glob(l1b_dir+"/*/*.fits*"))
np.save(l1b_dir+'/filenames', sorted(local_filenames))
except OSError:
raise Exception('rsync failed --- are you connected to the VPN?')
# get ending time
t1 = time.time()
seconds = t1 - t0
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
# tell us how long it took
print('Data syncing and cleanup took %.2d:%.2d:%.2d.' % (h, m, s))
def get_euvm_l2b_dir():
"""
Returns the directory where euvm_l2b data should be stored.
Parameters
----------
none
Returns
-------
euvm_l2b_dir : str
Directory to store EUVM L2B files in.
"""
pyuvs_path = os.path.dirname(os.path.realpath(__file__))
user_paths_py = os.path.join(pyuvs_path, "user_paths.py")
if not os.path.exists(user_paths_py):
setup_user_paths()
try:
from maven_iuvs.user_paths import euvm_l2b_dir
except ImportError:
# need to set euvm_l2b_dir
euvm_l2b_dir = input("Where should euvm_l2b data be stored?")
with open(user_paths_py, "a+") as f:
f.write("# This line added by get_euvm_l2b_dir.py\n")
f.write("euvm_l2b_dir = '"+euvm_l2b_dir+"'\n")
return euvm_l2b_dir
def sync_euvm_l2b(sdc_username, sdc_password):
"""
Sync EUVM L2B data file from MAVEN SDC. This deletes all old data
in euvm_l2b_dir and replaces it with a newly downloaded file.
Parameters
----------
sdc_username : str
Web login username for MAVEN SDC Team site.
sdc_password : str
Web login password for MAVEN SDC Team site.
Returns
-------
none
"""
print("syncing EUVM L2B...")
url = 'https://lasp.colorado.edu/maven/data/sci/euv/l2b/'
euvm_l2b_dir = get_euvm_l2b_dir()
# go to the SDC webpage and expect to see a login form
twill.browser.reset()
twill.browser.go(url)
# enter the login info
twill.commands.fv("1", 'username', sdc_username)
twill.commands.fv("1", 'password', sdc_password)
twill.browser.submit()
# load the page now that we're authenticated
twill.browser.go(url)
# find the most recent save file on the page
files = sorted([f.url for f in twill.browser.links if '.sav' in f.url])
most_recent = files[-1]
# navigate to that file
twill.browser.go(url+most_recent)
# delete old EUVM files in the EUVM l2b directory
old_fnames = glob.glob(euvm_l2b_dir+'*l2b*.sav')
[os.remove(f) for f in old_fnames]
# save the new file to disk
fname = euvm_l2b_dir + most_recent
with open(fname, "wb") as file:
file.write(twill.browser.dump)
def get_integrated_reports_dir():
"""
Returns the directory where MAVEN integrated reports files should be
stored.
Parameters
----------
none
Returns
-------
integrated_reports_dir : str
Directory to store MAVEN integrated reports in.
"""
pyuvs_path = os.path.dirname(os.path.realpath(__file__))
user_paths_py = os.path.join(pyuvs_path, "user_paths.py")
if not os.path.exists(user_paths_py):
setup_user_paths()
try:
from maven_iuvs.user_paths import integrated_reports_dir
except ImportError:
# need to set euvm_l2b_dir
integrated_reports_dir = input("Where should MAVEN Integrated Reports"
" data be stored?")
with open(user_paths_py, "a+") as f:
f.write("# This line added by get_integrated_reports_dir.py\n")
f.write("integrated_reports_dir = '"+integrated_reports_dir+"'\n")
return integrated_reports_dir
def sync_integrated_reports(sdc_username, sdc_password, check_old=False):
"""Sync Integrated Reports data from MAVEN Ops page. Syncs all new
files and all files from last 180 days by default.
Parameters
----------
sdc_username : str
Web login username for MAVEN SDC Team site.
sdc_password : str
Web login password for MAVEN SDC Team site.
check_old : bool
Whether to check all files in the integrated_reports_dir
against the server. Defaults to False.
Returns
-------
none
"""
print("syncing Integrated Reports...")
url = ('https://lasp.colorado.edu/ops/maven/team/'
+ 'inst_ops.php?content=msa_ir&show_all')
local_ir_dir = get_integrated_reports_dir()
# go to the SDC webpage and expect to see a login form
twill.browser.reset()
twill.browser.go(url)
# enter the login info
twill.commands.fv("1", 'username', sdc_username)
twill.commands.fv("1", 'password', sdc_password)
twill.browser.submit()
# load the page now that we're authenticated
twill.browser.go(url)
# get the list of integrated report files on the server
server_links = sorted([f for f in twill.browser.links if '.txt' in f.text])
# get the list of local integrated report files
local_files = [os.path.basename(f)
for f in glob.glob(os.path.join(local_ir_dir, '*'))]
if check_old:
# check all the files, not just the ones we don't have
to_download = server_links
else:
# figure out which ones on the server are new
old_time = datetime.datetime.now() - datetime.timedelta(days=180)
old_time = old_time.strftime("%y%m%d")
to_download = [f for f in server_links if ((int(f.text.split("_")[2])
> int(old_time))
or (f.text
not in local_files))]
# download the new files
from lxml.etree import ParserError
for link in to_download:
clear_line()
print(link.text, end="\r")
# modify the page link to a download link
download_link = link.url.replace("inst_ops.php?content=file&file=",
"download-file.php?public/")
# get the binary of the file
try:
twill.browser.go(download_link)
server_binary_data = twill.browser.dump
except ParserError:
# sometimes the files have zero size,
# which results in a ParserError
server_binary_data = b""
# get the local filename
fname = os.path.join(local_ir_dir, link.text)
# look at the local file contents and compare with remote
if os.path.exists(fname):
with open(fname, "rb") as file:
if file.read() == twill.browser.dump:
# file is the same as the server, keep it
continue
# if we're here either the local file doesn't exist
# or it's different from the server copy.
# Either way, download the server version
fname = os.path.join(local_ir_dir, link.text)
with open(fname, "wb") as file:
file.write(server_binary_data)
clear_line()
def sync_sdc(check_old=False):
"""Wrapper routine to sync EUVM L2B data and Integrated Reports from
MAVEN SDC.
Parameters
----------
check_old : bool
Whether to check all files in the integrated_reports_dir
against the server. Defaults to False.
Returns
-------
none
"""
username = input('Username for MAVEN Team SDC: ')
password = getpass('password for '+username+' on MAVEN Team SDC: ')
sync_euvm_l2b(username, password)
sync_integrated_reports(username, password, check_old=check_old)
Functions
def call_rsync(remote_path, local_path, ssh_password, extra_flags='')
-
Updates the SPICE kernels by rsyncing the VM folders to the local machine.
Parameters
remote_path
:str
- Path to sync on the remote machine.
local_path
:str
- Path to the sync on the local machine.
ssh_password
:str
- Plain text to send to process when it prompts for a password
extra_flags
:str
-
Extra flags for rsync command.
-trzL and -info=progress2 are already specified, extra_flags text are inserted afterward. Defaults to "".
Returns
none
Expand source code
def call_rsync(remote_path, local_path, ssh_password, extra_flags=""): """ Updates the SPICE kernels by rsyncing the VM folders to the local machine. Parameters ---------- remote_path : str Path to sync on the remote machine. local_path : str Path to the sync on the local machine. ssh_password : str Plain text to send to process when it prompts for a password extra_flags : str Extra flags for rsync command. -trzL and -info=progress2 are already specified, extra_flags text are inserted afterward. Defaults to "". Returns ------- none """ # get the version number of rsync try: result = subprocess.run(['rsync', '--version'], stdout=subprocess.PIPE, check=True) version = result.stdout.split(b'version')[1].split()[0] version = int(version.replace(b".", b"")) except subprocess.CalledProcessError: raise Exception("rsync failed ---" " is rsync installed on your system?") if version >= 313: # we can print total transfer progress progress_flag = '--info=progress2' else: progress_flag = '--progress' rsync_command = " ".join(['rsync -trvzL', progress_flag, extra_flags, remote_path, local_path]) print("running rsync_command: " + rsync_command) child = pexpect.spawn(rsync_command, encoding='utf-8') cpl = child.compile_pattern_list(['.* password: ', '[0-9]+%']) child.expect_list(cpl) if 'password' in child.after: # respond to server password request child.sendline(ssh_password) # print some progress info by searching for lines with a # percentage progress cpl = child.compile_pattern_list([pexpect.EOF, '[0-9]+%']) while True: i = child.expect_list(cpl, timeout=None) if i == 0: # end of file break if i == 1: percent = child.after.strip(" \t\n\t") # get file left to check also child.expect('[0-9]+/[0-9]+', timeout=None) file_numbers = child.after if version < 313: # compute progress from file numbers fnum1, fnum2 = list(map(int, file_numbers.split("/"))) percent = 1.0 - fnum1 / fnum2 percent = str(int(percent*100)) + "%" clear_line() print("rsync progress: " + percent + ' (files left: ' + file_numbers + ')', end='\r') child.close() clear_line() # clear last rsync message
def get_euvm_l2b_dir()
-
Returns the directory where euvm_l2b data should be stored.
Parameters
none
Returns
euvm_l2b_dir
:str
Directory to store EUVM L2B files in.
Expand source code
def get_euvm_l2b_dir(): """ Returns the directory where euvm_l2b data should be stored. Parameters ---------- none Returns ------- euvm_l2b_dir : str Directory to store EUVM L2B files in. """ pyuvs_path = os.path.dirname(os.path.realpath(__file__)) user_paths_py = os.path.join(pyuvs_path, "user_paths.py") if not os.path.exists(user_paths_py): setup_user_paths() try: from maven_iuvs.user_paths import euvm_l2b_dir except ImportError: # need to set euvm_l2b_dir euvm_l2b_dir = input("Where should euvm_l2b data be stored?") with open(user_paths_py, "a+") as f: f.write("# This line added by get_euvm_l2b_dir.py\n") f.write("euvm_l2b_dir = '"+euvm_l2b_dir+"'\n") return euvm_l2b_dir
def get_integrated_reports_dir()
-
Returns the directory where MAVEN integrated reports files should be stored.
Parameters
none
Returns
integrated_reports_dir
:str
- Directory to store MAVEN integrated reports in.
Expand source code
def get_integrated_reports_dir(): """ Returns the directory where MAVEN integrated reports files should be stored. Parameters ---------- none Returns ------- integrated_reports_dir : str Directory to store MAVEN integrated reports in. """ pyuvs_path = os.path.dirname(os.path.realpath(__file__)) user_paths_py = os.path.join(pyuvs_path, "user_paths.py") if not os.path.exists(user_paths_py): setup_user_paths() try: from maven_iuvs.user_paths import integrated_reports_dir except ImportError: # need to set euvm_l2b_dir integrated_reports_dir = input("Where should MAVEN Integrated Reports" " data be stored?") with open(user_paths_py, "a+") as f: f.write("# This line added by get_integrated_reports_dir.py\n") f.write("integrated_reports_dir = '"+integrated_reports_dir+"'\n") return integrated_reports_dir
def get_user_paths_filename()
-
Determines whether user_paths.py exists and returns the filename if it does.
Parameters
none
Returns
file_exists
:bool
- Whether user_paths.py exists
user_paths_py
:str
Absolute file path to user_paths.py
Expand source code
def get_user_paths_filename(): """ Determines whether user_paths.py exists and returns the filename if it does. Parameters ---------- none Returns ------- file_exists : bool Whether user_paths.py exists user_paths_py : str Absolute file path to user_paths.py """ pyuvs_path = os.path.dirname(os.path.realpath(__file__)) user_paths_py = os.path.join(pyuvs_path, "user_paths.py") file_exists = os.path.exists(user_paths_py) return file_exists, user_paths_py
def get_vm_file_list(server, serverdir, username, password, pattern='*.fits*', minorb=100, maxorb=100000, include_cruise=False, status_tag='')
-
Get a list of files from the VM that match a given pattern.
Parameters
server
:str
- name of the server to get files from (normally maven-iuvs-itf)
serverdir
:str
- directory to search for files matching the pattern
username
:str
- username for server access
password
:str
- password for server access
pattern
:str
- glob pattern used to search for matching files Defaults to '.fits' (matches all FITS files)
minorb
,maxorb
:int
- Minimum and maximum orbit numbers to sync from VM, in multiples of 100. Defaults to 100 and 100000, but smaller ranges than the available data will sync faster.
include_cruise
:bool
- Whether to sync cruise data in addition to the orbit range above. Defaults to False.
status_tag
:str
- Tag to decorate orbit number print string and inform user of progress. Defaults to "".
Returns
files
:np.array
- list of server filenames that match the pattern
Expand source code
def get_vm_file_list(server, serverdir, username, password, pattern="*.fits*", minorb=100, maxorb=100000, include_cruise=False, status_tag=""): """ Get a list of files from the VM that match a given pattern. Parameters ---------- server : str name of the server to get files from (normally maven-iuvs-itf) serverdir : str directory to search for files matching the pattern username : str username for server access password : str password for server access pattern : str glob pattern used to search for matching files Defaults to '*.fits*' (matches all FITS files) minorb, maxorb : int Minimum and maximum orbit numbers to sync from VM, in multiples of 100. Defaults to 100 and 100000, but smaller ranges than the available data will sync faster. include_cruise : bool Whether to sync cruise data in addition to the orbit range above. Defaults to False. status_tag : str Tag to decorate orbit number print string and inform user of progress. Defaults to "". Returns ------- files : np.array list of server filenames that match the pattern """ # connect to the server using paramiko ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.connect(server, username=username, password=password) # get the list of folders on the VM stdin, stdout, stderr = ssh.exec_command('ls '+serverdir) server_orbit_folders = np.loadtxt(stdout, dtype=str) # determine what folders to look for files in sync_orbit_folders = ["orbit"+str(orbno).zfill(5) for orbno in np.arange(minorb, maxorb, 100)] if include_cruise: sync_orbit_folders = np.append(["cruise"], sync_orbit_folders) # sync only folders that belong to both groups sync_orbit_folders = server_orbit_folders[np.isin(server_orbit_folders, sync_orbit_folders, assume_unique=True)] # set up the output files array files = [] # iterate through the folder list and get the filenames that match # the input pattern for folder in sync_orbit_folders: clear_line() print(status_tag+folder, end="\r") cmd = "ls "+serverdir+folder+"/"+pattern stdin, stdout, stderr = ssh.exec_command(cmd) if len(stderr.read()) == 0: files.append(np.loadtxt(stdout, dtype=str)) else: continue ssh.close() if len(files) == 0: return [] else: return np.concatenate(np.array(files, dtype=object))
def setup_user_paths()
-
Generates user_paths.py, used by sync_data to read data from the IUVS VM and store it locally
Parameters
none
Returns
none
Notes
This is an interactive routine called once, generally the first time the user calls sync_data.
Expand source code
def setup_user_paths(): """ Generates user_paths.py, used by sync_data to read data from the IUVS VM and store it locally Parameters ---------- none Returns ------- none Notes ------- This is an interactive routine called once, generally the first time the user calls sync_data. """ # if user_paths.py already exists then assume that the user has # set everything up already file_exists, user_paths_py = get_user_paths_filename() if file_exists: return # get the location of the default L1B and SPICE directory print("Syncing all of the L1B data could take up to 2TB of disk space.") l1b_dir = input("Where would you like IUVS l1b FITS files" " to be stored by sync_data? ") print("Syncing all of the SPICE kernels could take up to 300GB of disk" " space.") spice_dir = input("Where would you like MAVEN/IUVS SPICE" " kernels to be stored by sync_data? ") # get the VM username to be used in rsync calls vm_username = input("What is your username for the" " IUVS VM to sync files? ") user_paths_file = open(user_paths_py, "x") user_paths_file.write("# This file automatically generated by" " maven_iuvs.download.setup_file_paths\n") user_paths_file.write("l1b_dir = \""+l1b_dir+"\"\n") user_paths_file.write("spice_dir = \""+spice_dir+"\"\n") user_paths_file.write("iuvs_vm_username = \""+vm_username+"\"\n") user_paths_file.close()
def sync_data(spice=True, l1b=True, pattern='*.fits*', minorb=100, maxorb=100000, include_cruise=False)
-
Synchronize new SPICE kernels and L1B data from the VM and remove any old files that have been replaced by newer versions.
Parameters
spice
:bool
- Whether or not to sync SPICE kernels. Defaults to True.
l1b
:bool
- Whether or not to sync level 1B data. Defaults to True.
pattern
:str
-
glob pattern used to search for matching files
Defaults to '.fits' (matches all FITS files)
minorb
,maxorb
:int
-
Minimum and maximum orbit numbers to sync from VM, in multiples of 100.
Defaults to 100 and 100000, but smaller ranges than the available data will sync faster.
include_cruise
:bool
-
Whether to sync cruise data in addition to the orbit range above.
Defaults to False.
Returns
None.
Expand source code
def sync_data(spice=True, l1b=True, pattern="*.fits*", minorb=100, maxorb=100000, include_cruise=False): """ Synchronize new SPICE kernels and L1B data from the VM and remove any old files that have been replaced by newer versions. Parameters ---------- spice : bool Whether or not to sync SPICE kernels. Defaults to True. l1b : bool Whether or not to sync level 1B data. Defaults to True. pattern : str glob pattern used to search for matching files Defaults to '*.fits*' (matches all FITS files) minorb, maxorb : int Minimum and maximum orbit numbers to sync from VM, in multiples of 100. Defaults to 100 and 100000, but smaller ranges than the available data will sync faster. include_cruise : bool Whether to sync cruise data in addition to the orbit range above. Defaults to False. Returns ------- None. """ # check if user path data exists and set it if not setup_user_paths() # load user path data from file from maven_iuvs.user_paths import l1b_dir, spice_dir, iuvs_vm_username if not os.path.exists(spice_dir): raise Exception("Cannot find specified SPICE directory." " Is it accessible?") if not os.path.exists(l1b_dir): raise Exception("Cannot find specified L1B directory." " Is it accessible?") # get starting time t0 = time.time() # define VM-related variables vm = 'maven-iuvs-itf' login = iuvs_vm_username + '@' + vm + ':' production_l1b = '/maven_iuvs/production/products/level1b/' stage_l1b = '/maven_iuvs/stage/products/level1b/' vm_spice = login + '/maven_iuvs/stage/anc/spice/' # try to sync the files, if it fails, user probably isn't on the VPN try: # get user password for the VM iuvs_vm_password = getpass('input password for '+login+' ') # sync SPICE kernels if spice is True: print('Updating SPICE kernels...') call_rsync(vm_spice, spice_dir, iuvs_vm_password, extra_flags="--delete") # sync level 1B data if l1b is True: # get the file names of all the relevant files print('Fetching names of level 1B production and stage' ' files from the VM...') prod_filenames = get_vm_file_list(vm, production_l1b, iuvs_vm_username, iuvs_vm_password, pattern=pattern, minorb=minorb, maxorb=maxorb, include_cruise=include_cruise, status_tag='production: ') stage_filenames = get_vm_file_list(vm, stage_l1b, iuvs_vm_username, iuvs_vm_password, pattern=pattern, minorb=minorb, maxorb=maxorb, include_cruise=include_cruise, status_tag='stage: ') local_filenames = glob.glob(l1b_dir+"/*/"+pattern) # get the list of most recent files, no matter where they are # order matters! putting local_filenames first ensures # duplicates aren't transferred if (len(prod_filenames) == 0 and len(stage_filenames) == 0): print("No matching files on VM") return files_to_sync = get_latest_files(np.concatenate([local_filenames, prod_filenames, stage_filenames])) # figure out which files to get from production and stage files_from_production = [a[len(production_l1b):] for a in files_to_sync if (a[:len(production_l1b)] == production_l1b)] files_from_stage = [a[len(stage_l1b):] for a in files_to_sync if a[:len(stage_l1b)] == stage_l1b] # production # save the files to rsync to temporary files # this way rsync can use the files_from flag transfer_from_production_file = tempfile.NamedTemporaryFile() np.savetxt(transfer_from_production_file.name, files_from_production, fmt="%s") print('Syncing ' + str(len(files_from_production)) + ' files from production...') call_rsync(login+production_l1b, l1b_dir, iuvs_vm_password, extra_flags=('--files-from=' + transfer_from_production_file.name)) # stage, identical to above transfer_from_stage_file = tempfile.NamedTemporaryFile() np.savetxt(transfer_from_stage_file.name, files_from_stage, fmt="%s") print('Syncing ' + str(len(files_from_stage)) + ' files from stage...') call_rsync(login+stage_l1b, l1b_dir, iuvs_vm_password, extra_flags=('--files-from=' + transfer_from_stage_file.name)) # now delete all of the old files superseded by newer versions clear_line() print('Cleaning up old files...') # figure out what files need to be deleted local_filenames = glob.glob(l1b_dir+"/*/*.fits*") latest_local_files = get_latest_files(local_filenames) local_files_to_delete = np.setdiff1d(local_filenames, latest_local_files) # ask if it's OK to delete the old files while True and len(local_files_to_delete) > 0: del_files = input('Delete ' + str(len(local_files_to_delete)) + ' old files? (y/n/p)') if del_files == 'n': # don't delete the files break if del_files == 'y': # delete the files [os.remove(f) for f in local_files_to_delete] break if del_files == 'p': print(local_files_to_delete) else: print("Please answer y or n, or p to print the file list.") # Question for merge manager: # Kyle's code keeps a list of these deleted files # in excluded_files.txt --- is this necessary? # index all local files to speed up later finding local_filenames = sorted(glob.glob(l1b_dir+"/*/*.fits*")) np.save(l1b_dir+'/filenames', sorted(local_filenames)) except OSError: raise Exception('rsync failed --- are you connected to the VPN?') # get ending time t1 = time.time() seconds = t1 - t0 m, s = divmod(seconds, 60) h, m = divmod(m, 60) # tell us how long it took print('Data syncing and cleanup took %.2d:%.2d:%.2d.' % (h, m, s))
def sync_euvm_l2b(sdc_username, sdc_password)
-
Sync EUVM L2B data file from MAVEN SDC. This deletes all old data in euvm_l2b_dir and replaces it with a newly downloaded file.
Parameters
sdc_username
:str
- Web login username for MAVEN SDC Team site.
sdc_password
:str
- Web login password for MAVEN SDC Team site.
Returns
none
Expand source code
def sync_euvm_l2b(sdc_username, sdc_password): """ Sync EUVM L2B data file from MAVEN SDC. This deletes all old data in euvm_l2b_dir and replaces it with a newly downloaded file. Parameters ---------- sdc_username : str Web login username for MAVEN SDC Team site. sdc_password : str Web login password for MAVEN SDC Team site. Returns ------- none """ print("syncing EUVM L2B...") url = 'https://lasp.colorado.edu/maven/data/sci/euv/l2b/' euvm_l2b_dir = get_euvm_l2b_dir() # go to the SDC webpage and expect to see a login form twill.browser.reset() twill.browser.go(url) # enter the login info twill.commands.fv("1", 'username', sdc_username) twill.commands.fv("1", 'password', sdc_password) twill.browser.submit() # load the page now that we're authenticated twill.browser.go(url) # find the most recent save file on the page files = sorted([f.url for f in twill.browser.links if '.sav' in f.url]) most_recent = files[-1] # navigate to that file twill.browser.go(url+most_recent) # delete old EUVM files in the EUVM l2b directory old_fnames = glob.glob(euvm_l2b_dir+'*l2b*.sav') [os.remove(f) for f in old_fnames] # save the new file to disk fname = euvm_l2b_dir + most_recent with open(fname, "wb") as file: file.write(twill.browser.dump)
def sync_integrated_reports(sdc_username, sdc_password, check_old=False)
-
Sync Integrated Reports data from MAVEN Ops page. Syncs all new files and all files from last 180 days by default.
Parameters
sdc_username
:str
- Web login username for MAVEN SDC Team site.
sdc_password
:str
- Web login password for MAVEN SDC Team site.
check_old
:bool
- Whether to check all files in the integrated_reports_dir against the server. Defaults to False.
Returns
none
Expand source code
def sync_integrated_reports(sdc_username, sdc_password, check_old=False): """Sync Integrated Reports data from MAVEN Ops page. Syncs all new files and all files from last 180 days by default. Parameters ---------- sdc_username : str Web login username for MAVEN SDC Team site. sdc_password : str Web login password for MAVEN SDC Team site. check_old : bool Whether to check all files in the integrated_reports_dir against the server. Defaults to False. Returns ------- none """ print("syncing Integrated Reports...") url = ('https://lasp.colorado.edu/ops/maven/team/' + 'inst_ops.php?content=msa_ir&show_all') local_ir_dir = get_integrated_reports_dir() # go to the SDC webpage and expect to see a login form twill.browser.reset() twill.browser.go(url) # enter the login info twill.commands.fv("1", 'username', sdc_username) twill.commands.fv("1", 'password', sdc_password) twill.browser.submit() # load the page now that we're authenticated twill.browser.go(url) # get the list of integrated report files on the server server_links = sorted([f for f in twill.browser.links if '.txt' in f.text]) # get the list of local integrated report files local_files = [os.path.basename(f) for f in glob.glob(os.path.join(local_ir_dir, '*'))] if check_old: # check all the files, not just the ones we don't have to_download = server_links else: # figure out which ones on the server are new old_time = datetime.datetime.now() - datetime.timedelta(days=180) old_time = old_time.strftime("%y%m%d") to_download = [f for f in server_links if ((int(f.text.split("_")[2]) > int(old_time)) or (f.text not in local_files))] # download the new files from lxml.etree import ParserError for link in to_download: clear_line() print(link.text, end="\r") # modify the page link to a download link download_link = link.url.replace("inst_ops.php?content=file&file=", "download-file.php?public/") # get the binary of the file try: twill.browser.go(download_link) server_binary_data = twill.browser.dump except ParserError: # sometimes the files have zero size, # which results in a ParserError server_binary_data = b"" # get the local filename fname = os.path.join(local_ir_dir, link.text) # look at the local file contents and compare with remote if os.path.exists(fname): with open(fname, "rb") as file: if file.read() == twill.browser.dump: # file is the same as the server, keep it continue # if we're here either the local file doesn't exist # or it's different from the server copy. # Either way, download the server version fname = os.path.join(local_ir_dir, link.text) with open(fname, "wb") as file: file.write(server_binary_data) clear_line()
def sync_sdc(check_old=False)
-
Wrapper routine to sync EUVM L2B data and Integrated Reports from MAVEN SDC.
Parameters
check_old
:bool
- Whether to check all files in the integrated_reports_dir against the server. Defaults to False.
Returns
none
Expand source code
def sync_sdc(check_old=False): """Wrapper routine to sync EUVM L2B data and Integrated Reports from MAVEN SDC. Parameters ---------- check_old : bool Whether to check all files in the integrated_reports_dir against the server. Defaults to False. Returns ------- none """ username = input('Username for MAVEN Team SDC: ') password = getpass('password for '+username+' on MAVEN Team SDC: ') sync_euvm_l2b(username, password) sync_integrated_reports(username, password, check_old=check_old)