import os
import numpy as np
import pandas as pd
from ast import literal_eval
import re
import h0rton.tdlmc_data
from h0rton.tdlmc_utils import tdlmc_metrics
__all__ = ['convert_to_dataframe', 'parse_closed_box', 'parse_open_box',
'read_from_csv', 'format_results_for_tdlmc_metrics']
tdlmc_data_path = os.path.abspath(list(h0rton.tdlmc_data.__path__)[0])
"""str: directory path containing the TDLMC data
"""
[docs]def read_from_csv(csv_path):
"""Read a Pandas Dataframe from the combined csv file of TDLMC data while
evaluating all the relevant strings in each column as Python objects
Parameters
----------
csv_path : str
path to the csv file generated using `convert_to_dataframe`
Returns
-------
Pandas DataFrame
the TDLMC data with correct Python objects
"""
df = pd.read_csv(csv_path, index_col=False)
# These are columns that are lists
for list_col in [
'host_pos',
'measured_td',
'measured_td_err',
'agn_img_pos_x',
'agn_img_pos_y',
'agn_img_amp',
'time_delays',
]:
df[list_col] = df[list_col].apply(literal_eval).apply(np.array)
return df
[docs]def convert_to_dataframe(rung, save_csv_path):
"""Store the TDLMC closed and open boxes into a Pandas DataFrame and exports
to a csv file at the same location
Parameters
----------
rung : int
rung number
save_csv_path : str
path of the csv file to be generated
Returns
-------
Pandas DataFrame
the extracted rung data
"""
if save_csv_path is None:
save_csv_path = os.path.join(tdlmc_data_path, 'rung{:d}_combined.csv'.format(rung))
print("Saving rung {:d} data at {:s}...".format(rung, save_csv_path))
df = pd.DataFrame()
for code in ['code1', 'code2']:
closed_code_dir = os.path.join(tdlmc_data_path, 'rung{:d}'.format(rung), code)
open_code_dir = os.path.join(tdlmc_data_path, 'rung{:d}_open_box'.format(rung), code)
seeds = sorted(os.listdir(closed_code_dir)) # list of seeds, e.g. 'f160w-seed101'
row = {} # initialized dict in which to save lens info
for seed in seeds:
# Path to the text files
closed_box_path = os.path.join(closed_code_dir, seed, 'lens_info_for_Good_team.txt')
open_box_path = os.path.join(open_code_dir, seed, 'lens_all_info.txt')
# Save seed path for easy access
row['name'] = 'rung{:d}_{:s}_{:s}'.format(rung, code, seed)
row['seed'] = seed
row['seed_path'] = os.path.join(closed_code_dir, seed)
# Parse the text files
row = parse_closed_box(closed_box_path, row)
row = parse_open_box(open_box_path, row)
df = df.append(row, ignore_index=True)
# Unravel nested dictionaries in some columns
lens_mass = df['lens_mass'].apply(pd.Series).copy().add_prefix('lens_mass_')
lens_light = df['lens_light'].apply(pd.Series).copy().add_prefix('lens_light_')
ext_shear_bphi = df['ext_shear_bphi'].apply(pd.Series).copy().add_prefix('ext_shear_')
ext_shear_e1e2 = df['ext_shear_e1e2'].apply(pd.Series).copy().add_prefix('ext_shear_')
df = pd.concat([df.drop(['lens_mass', 'lens_light', 'ext_shear_bphi', 'ext_shear_e1e2'], axis=1), lens_mass, lens_light, ext_shear_bphi, ext_shear_e1e2], axis=1)
# Manually add abcd_ordering_i
df = df.sort_values('seed', axis=0)
if rung == 1:
df['abcd_ordering_i'] = np.array([[0, 1, 2, 3], #101
[0, 1, 2, 3], #102
[0, 1, 2, 3], #103
[0, 1], #104
[0, 1], #105
[0, 1, 2, 3], #107
[1, 0, 3, 2], #108
[1, 2, 0, 3], #109
[1, 2, 3, 0], #110
[3, 1, 0, 2], #111
[2, 0, 1, 3], #113
[1, 0], #114
[1, 3, 2, 0], #115
[1, 0], #116
[3, 2, 0, 1], #117
[3, 1, 0, 2], #118
])
df['H0'] = 74.151
elif rung == 2:
df['abcd_ordering_i'] = np.array([[0, 1, 2, 3], #119
[0, 1, 2, 3], #120
[0, 1, 2, 3], #121
[0, 1, 2, 3], #122
[0, 1, 2, 3], #123
[0, 2, 1, 3], #124
[0, 1], #125
[0, 1], #126
[3, 0, 1, 2], #127
[3, 2, 0, 1], #128
[3, 0, 1, 2], #129
[2, 1, 0, 3], #130
[3, 0, 2, 1], #131
[1, 3, 2, 0], #132
[1, 0], #133
[0, 1], #134
])
df['H0'] = 66.643
else:
raise NotImplementedError
df.to_csv(save_csv_path, index=None)
return df
[docs]def parse_closed_box(closed_box_path, row_dict=dict()):
"""Parse the lines of an open-box TDLMX text file for Rungs 0, 1, and 2
Parameters
----------
closed_box_path : str
path to the closed box text file, `lens_info_for_Good_team.txt.txt`
row_dict : dict
dictionary of the row info to update. Default: dict()
Returns
-------
dict
An updated dictionary containing the information in the closed box text file
"""
file = open(closed_box_path)
lines = [line.rstrip('\n') for line in file]
row_dict['z_lens'], row_dict['z_src'] = literal_eval(lines[2].split('\t')[1])
row_dict['measured_vel_disp'] = float(lines[5].split('\t')[1].split('km/s')[0])
row_dict['measured_vel_disp_err'] = float(lines[5].split('\t')[1].split('km/s')[1].split(':')[1])
row_dict['measured_td'] = literal_eval(re.split(r'\(|\)', lines[7])[1])
row_dict['measured_td_err'] = literal_eval(re.split(r'\(|\)', lines[7])[3])
return row_dict
[docs]def parse_open_box(open_box_path, row_dict=dict()):
"""Parse the lines of an open-box TDLMX text file for Rungs 0, 1, and 2
Parameters
----------
open_box_path : str
path to the open box text file, `lens_all_info.txt`
row_dict : dict
dictionary of the row info to update. Default: dict()
Returns
-------
dict
An updated dictionary containing the information in the open box text file
"""
file = open(open_box_path)
lines = [line.rstrip('\n') for line in file]
row_dict['H0'] = float(re.split(r':\s|km/s/Mpc', lines[3])[-2])
row_dict['td_distance'] = float(re.split('ls:|Mpc', lines[5])[-2])
row_dict['time_delays'] = literal_eval(re.split(r'\(|\)', lines[7])[1])
row_dict['lens_mass'] = literal_eval(lines[11][7:])
row_dict['ext_shear_e1e2'], row_dict['ext_shear_bphi'] = literal_eval(re.split(r'\(|\)', lines[12])[1])
row_dict['lens_light'] = literal_eval(lines[14].split('\t')[1])
row_dict['host_name'] = re.split(r'\(|\)|:|\t', lines[16])[2][1:]
row_dict['host_pos'] = literal_eval(re.split(r'\(|\)|:|\t', lines[16])[-2])
row_dict['host_mag'] = float(re.split(r'\t|\s', lines[17])[3])
row_dict['host_r_eff'] = float(re.split(r'\t|\s', lines[17])[7])
row_dict['agn_src_amp'] = float(lines[20].split()[-1])
row_dict['agn_img_pos_x'] = literal_eval(re.split(r'\(|\)', lines[21])[1])
row_dict['agn_img_pos_y'] = literal_eval(re.split(r'\(|\)', lines[21])[3])
row_dict['agn_img_amp'] = literal_eval(re.split(r'\(|\)', lines[22])[1])
row_dict['host_img_mag'] = re.split('plane: |mag|', lines[23])[3]
row_dict['agn_img_mag'] = re.split('plane: |mag|', lines[23])[7]
row_dict['vel_disp'] = float(re.split(r'km\/s| |\t', lines[25])[1])
row_dict['kappa_ext'] = float(lines[27].split('\t')[1])
return row_dict