/* This program dumps the column numbers (zero-based) and names from the Gaia satellite data, specifically the data release 2 data. It requires downloading the Gaia data (which is huge if you download it all; the whole database is about 550 GB, and those are mostly gzipped files!) from: http://cdn.gea.esac.esa.int/Gaia/gdr2/ This program just uses the limited data set in gaia_source_with_rv which contains data for about 7.2 million stars. The full data set contains about 1.2 billion stars. The data for gaia_source_with_rv can be downloaded directly from http://cdn.gea.esac.esa.int/Gaia/gdr2/gaia_source_with_rv/csv/ which is about 3.1 GB (gzipped!) of data; it's about 7.5 GB uncompressed. The new gunzip function in Frink will allow you to process gzip-compressed files in place. Description of the whole data release is available at: http://gea.esac.esa.int/archive/documentation/GDR2/ More specifically, the description of the gaia_source table used in this program is available in section 14.1.1 of that document at: http://gea.esac.esa.int/archive/documentation/GDR2/Gaia_archive/chap_datamodel/sec_dm_main_tables/ssec_dm_gaia_source.html */ // Modify this path to where your Gaia files reside dir = "/home/eliasen/builds/Gaia/gdr2/gaia_source_with_rv/csv" for url = first[select[fileURLs[dir], %r/\.csv\.gz/], 1] { line = first[lines[gunzip[url]], 1] line = array[line]@0 [colToName, nameToCol] = makeColumnDictionaries[line, true] } /** Makes two dictionaries that map column numbers to column names and vice versa. Returns: [colToName, nameToCol] */ makeColumnDictionaries[line, print=false] := { fields = split[%r/,/, line] colToName = new dict for i = rangeOf[fields] colToName@i = fields@i nameToCol = colToName.invert[] if print { for key = sort[keys[colToName]] println["$key\t" + colToName@key] println[] println[] for key = lexicalSort[keys[nameToCol]] println[nameToCol@key + "\t$key"] } return[colToName, nameToCol] }