%separateNewerDataColumns % Separates the file luca.newer.csv into individual columns in % individual files. The files have the same name as the column % whose data they store. This is necessary because luca.newer.csv % is HUGE: 700K rows by 50 columns, but this script needs to be % run only once. % NOTE: This script calls the shell and runs a short awk program. %Written by Luca Cazzanti %Copyright 2005 %$Id$ location = 'newer_csv_data'; fileName = fullfile(location, 'luca.newer.csv'); columnNames = {'id', ... 'cluster_center_index', ... 'cluster_center_rank', ... 'cluster_center_size', ... 'convergence', 'evalue', ... 'experiment_astral_ac', ... 'experiment_contact_order', ... 'experiment_file', ... 'experiment_index', ... 'experiment_percent_alpha', ... 'experiment_percent_beta', ... 'experiment_sccs', ... 'experiment_sequence_key', ... 'experiment_sequence_length', ... 'experiment_structure_key', ... 'ln_e', ... 'n_decoys_in_outfile', ... 'nss', ... 'nsup', ... 'prediction_ac', ... 'prediction_contact_order', ... 'prediction_file', ... 'prediction_index', ... 'prediction_percent_alpha', ... 'prediction_percent_beta', ... 'prediction_sccs', ... 'prediction_sequence_key', ... 'prediction_sequence_length', ... 'prediction_structure_key', ... 'probability', ... 'prob2', ... 'psi1', ... 'psi2', ... 'score', ... 'target', ... 'zscore', ... 'zscore_normalized', ... 'zscore_rank', ... 'correct_fold', ... 'correct_superfamily', ... 'self_match', ... 'class', ... 'cv', ... 'nt', ... 'ratio', ... 'aratio', ... 'bratio', ... 'testprob', ... 'testprob2'}; nColumns = length(columnNames); for idx = 1:nColumns disp([columnNames{idx} '...']); columnFileName = fullfile(location, [columnNames{idx} '.txt']); awkCommand = ['!awk ''{print $' num2str(idx) '}'' ' ... fileName ' > ' columnFileName]; eval(awkCommand); end