% function [sameSf, diffSf, featNames, sameSfOther, diffSfOther] = ... % formNewerFeatureMatrix(dataMat, columnNames) %formNewerFeatureMatrix - form feature matrices from "newer" data. function [sameSfPred, sameSfExp, diffSfPred, diffSfExp, featNames] = ... predExpFeatureMatrices(dataMat, columnNames) %% Quick error check nNames = length(columnNames); nCols = size(dataMat, 2); if nNames ~= nCols error('Number of column names must be the same as the number of columns'); end %% First eliminate the data points for which the contact_order is 0 %% as per email exhange with Lars, 5-11 and 5-12 2005. colIdx = strmatch('prediction_contact_order', columnNames, 'exact'); eliminateIdx = find( dataMat(:,colIdx) == 0); dataMat(eliminateIdx,:) = []; colIdx = strmatch('experiment_contact_order', columnNames, 'exact'); eliminateIdx = find( dataMat(:,colIdx) == 0); dataMat(eliminateIdx,:) = []; %% Form feature matrices- predicted and exp featMat = []; predMat = []; expMat = []; % ... ratio feature ... colIdx1 = strmatch('prediction_sequence_length', columnNames, 'exact'); colIdx2 = strmatch('experiment_sequence_length', columnNames, 'exact'); featMat = [featMat dataMat(:, colIdx1)./dataMat(:,colIdx2)]; predMat = [predMat dataMat(:, colIdx1)]; expMat = [expMat dataMat(:, colIdx2)]; %featNames{1} = 'ratio'; featNames{1} = 'length'; % ... difference in percent alpha ... colIdx1 = strmatch('prediction_percent_alpha', columnNames, 'exact'); colIdx2 = strmatch('experiment_percent_alpha', columnNames, 'exact'); featMat = [featMat diff(dataMat(:, [colIdx1 colIdx2]), 1, 2)]; predMat = [predMat dataMat(:, colIdx1)]; expMat = [expMat dataMat(:, colIdx2)]; %featNames{2} = 'difference_percent_alpha'; featNames{2} = 'alpha%'; % ... difference in percent beta ... colIdx1 = strmatch('prediction_percent_beta', columnNames, 'exact'); colIdx2 = strmatch('experiment_percent_beta', columnNames, 'exact'); featMat = [featMat diff(dataMat(:, [colIdx1 colIdx2]), 1, 2)]; predMat = [predMat dataMat(:, colIdx1)]; expMat = [expMat dataMat(:, colIdx2)]; %featNames{3} = 'difference_percent_beta'; featNames{3} = 'beta%'; % ... Mammoth zscore feature ... % copy the same z-score into pred and exp colIdx = strmatch('zscore', columnNames, 'exact'); featMat = [featMat dataMat(:, colIdx)]; predMat = [predMat dataMat(:, colIdx)]; expMat = [expMat dataMat(:, colIdx)]; featNames{4} = 'zscore'; % ... experiment contact order (THIS MAY CHANGE) ... % Why the experimet contact order? It should be the prediction, right? %colIdx = strmatch('experiment_contact_order', columnNames, 'exact'); %featMat = [featMat dataMat(:, colIdx)]; %featNames{5} = 'experiment_contact_order'; colIdx1 = strmatch('prediction_contact_order', columnNames, 'exact'); colIdx2 = strmatch('experiment_contact_order', columnNames, 'exact'); featMat = [featMat dataMat(:, colIdx1)]; predMat = [predMat dataMat(:, colIdx1)]; expMat = [expMat dataMat(:, colIdx2)]; % featNames{5} = 'prediction_contact_order'; featNames{5} = 'contactOrder'; % additional features not used in classifiers otherFeatMat = []; otherFeatCount = 1; colIdx = strmatch('prob2', columnNames, 'exact'); tmp = dataMat(:, colIdx); otherFeatMat = [otherFeatMat tmp]; otherFeatNames{otherFeatCount} = 'prob2'; otherFeatCount = otherFeatCount + 1; colIdx = strmatch('prediction_sequence_length', columnNames, 'exact'); tmp = dataMat(:, colIdx); otherFeatMat = [otherFeatMat tmp]; otherFeatNames{otherFeatCount} = 'prediction_sequence_length'; otherFeatCount = otherFeatCount + 1; colIdx = strmatch('convergence', columnNames, 'exact'); tmp = dataMat(:, colIdx); otherFeatMat = [otherFeatMat tmp]; otherFeatNames{1} = 'convergence'; otherFeatCount = otherFeatCount + 1; %% Separate same superfamily/different superfamily colIdx = strmatch('correct_superfamily', columnNames, 'exact'); tmp = dataMat(:, colIdx); sameSfIdx = find(tmp == 1); sameSf = featMat(sameSfIdx,:); sameSfPred = predMat(sameSfIdx, :); sameSfExp = expMat(sameSfIdx, :); diffSf = featMat; diffSfPred = predMat; diffSfExp = expMat; clear featMat; clear predMat; clear expMat; diffSf(sameSfIdx,:) = []; diffSfPred(sameSfIdx,:) = []; diffSfExp(sameSfIdx,:) = []; sameSfOther = otherFeatMat(sameSfIdx,:); diffSfOther = otherFeatMat; clear otherFeatMat; diffSfOther(sameSfIdx,:) = []; return