function [sameSf, diffSf, featNames, sameSfOther, diffSfOther] = ... formNewerFeatureMatrix(dataMat, columnNames) %formNewerFeatureMatrix - form feature matrices from "newer" data. %Written by Luca Cazzanti %Copyright 2005 %$Id$ %% Quick error check nNames = length(columnNames); nCols = size(dataMat, 2); if nNames ~= nCols error('Number of column names must be the same as the number of columns'); end %% First eliminate the data points for which the contact_order is 0 %% as per email exhange with Lars, 5-11 and 5-12 2005. colIdx = strmatch('prediction_contact_order', columnNames, 'exact'); eliminateIdx = find( dataMat(:,colIdx) == 0); dataMat(eliminateIdx,:) = []; colIdx = strmatch('experiment_contact_order', columnNames, 'exact'); eliminateIdx = find( dataMat(:,colIdx) == 0); dataMat(eliminateIdx,:) = []; %% Form feature matrix featMat = []; % ... ratio feature ... colIdx1 = strmatch('prediction_sequence_length', columnNames, 'exact'); colIdx2 = strmatch('experiment_sequence_length', columnNames, 'exact'); featMat = [featMat dataMat(:, colIdx1)./dataMat(:,colIdx2)]; featNames{1} = 'ratio'; % ... difference in percent alpha ... colIdx1 = strmatch('prediction_percent_alpha', columnNames, 'exact'); colIdx2 = strmatch('experiment_percent_alpha', columnNames, 'exact'); featMat = [featMat diff(dataMat(:, [colIdx1 colIdx2]), 1, 2)]; featNames{2} = 'difference_percent_alpha'; % ... difference in percent beta ... colIdx1 = strmatch('prediction_percent_beta', columnNames, 'exact'); colIdx2 = strmatch('experiment_percent_beta', columnNames, 'exact'); featMat = [featMat diff(dataMat(:, [colIdx1 colIdx2]), 1, 2)]; featNames{3} = 'difference_percent_beta'; % ... Mammoth zscore feature ... colIdx = strmatch('zscore', columnNames, 'exact'); featMat = [featMat dataMat(:, colIdx)]; featNames{4} = 'zscore'; % ... experiment contact order (THIS MAY CHANGE) ... % Why the experimet contact order? It should be the prediction, right? %colIdx = strmatch('experiment_contact_order', columnNames, 'exact'); %featMat = [featMat dataMat(:, colIdx)]; %featNames{5} = 'experiment_contact_order'; colIdx = strmatch('prediction_contact_order', columnNames, 'exact'); featMat = [featMat dataMat(:, colIdx)]; featNames{5} = 'prediction_contact_order'; % additional features not used in classifiers otherFeatMat = []; otherFeatCount = 1; colIdx = strmatch('prob2', columnNames, 'exact'); tmp = dataMat(:, colIdx); otherFeatMat = [otherFeatMat tmp]; otherFeatNames{otherFeatCount} = 'prob2'; otherFeatCount = otherFeatCount + 1; colIdx = strmatch('prediction_sequence_length', columnNames, 'exact'); tmp = dataMat(:, colIdx); otherFeatMat = [otherFeatMat tmp]; otherFeatNames{otherFeatCount} = 'prediction_sequence_length'; otherFeatCount = otherFeatCount + 1; colIdx = strmatch('convergence', columnNames, 'exact'); tmp = dataMat(:, colIdx); otherFeatMat = [otherFeatMat tmp]; otherFeatNames{1} = 'convergence'; otherFeatCount = otherFeatCount + 1; %% Separate same superfamily/different superfamily colIdx = strmatch('correct_superfamily', columnNames, 'exact'); tmp = dataMat(:, colIdx); sameSfIdx = find(tmp == 1); sameSf = featMat(sameSfIdx,:); diffSf = featMat; clear featMat; diffSf(sameSfIdx,:) = []; sameSfOther = otherFeatMat(sameSfIdx,:); diffSfOther = otherFeatMat; clear otherFeatMat; diffSfOther(sameSfIdx,:) = []; return