function dataMat = readSeparatedDataColumns( location, ... desiredColumns) %readNewerDataFromCsv - Read newer data provided by Lars. % Reads the newer data from Lars. It assumes the script % separateNewerDataColumns() has already been run. This script % eliminates the data points for which self_match = 1. % % USAGE: dataMat =readSeparatedDataColumns(location, desiredColumns) % location - string with the directory where the results of % separateNewerDataColumns() are stored. % desiredColumns - cell array of strings, each indicating the name % of the column (feature) to be read. % dataMat - matrix, each column corresponding to one of the columnNames. % % EXAMPLE: % dataMat = readSeparatedDataColumns('data_dir', {'ratio', zscore'}); % returns a N-by-2 matrix, with column 1 containing the ratio feature % and column 2 containing the zscore feature. %Written by Luca Cazzanti %Copyright 2005 %$Id$ %location = 'newer_csv_data'; %desiredColumns = {'ratio', 'zscore', ... % 'experiment_contact_order', 'prediction_contact_order', ... % 'experiment_percent_alpha', 'prediction_percent_alpha', ... % 'experiment_percent_beta', 'prediction_percent_beta', ... % 'correct_superfamily'}; nColumns = length(desiredColumns); % First find the self-matches: must take them out of each feature fname = fullfile(location, 'self_match.txt'); if exist(fname, 'file') self_match = textread(fname, '%n', 'headerlines', 1); idx = find(self_match == 1); dataMat = zeros(length(self_match) - length(idx), nColumns); clear self_match; disp('Self-matches were found and removed'); % Find rest of features for iCol = 1:nColumns fname = fullfile(location, [desiredColumns{iCol} '.txt']); tmp = textread(fname, '%n', 'headerlines', 1); tmp(idx) = []; dataMat(:,iCol) = tmp; end else % Find rest of features for iCol = 1:nColumns fname = fullfile(location, [desiredColumns{iCol} '.txt']); tmp = textread(fname, '%n', 'headerlines', 1); dataMat(:,iCol) = tmp; end end %if exist(fname)