function [estClass] = calcCentroidalParametricClassX(testMat, trainMat, classPriors) % calcParametricClass % assumes similarity measure is Hamming similarity % = counting common features. % output is a vector of class Labels corresponding to the test points % which are rows of testMat % % models the similarity s(x, mu1) by an exponential distribution % (which is max ent/max likelihood distribution) % then compares log posterior probability of each class. % test = test feature matrix where each row is a test feature vector % trainMat = database of all training vectors %'the right one [numTrain, prenumFeatures] = size(trainMat); [numTest, junk] = size(testMat); numFeatures = prenumFeatures-1; trainLabel = trainMat(:, prenumFeatures); numClasses = length(classPriors); %%%%%%% GET PARAMETRIC MODEL FOR EACH CLASS %%%%%%%%%%% % first we need a representative guy for each class trainMat1 = trainMat(find(trainMat(:, numFeatures+1)==1),1:numFeatures); trainMat2 = trainMat(find(trainMat(:, numFeatures+1)==2),1:numFeatures); [u1] = getMostRepElement(trainMat1); [u2] = getMostRepElement(trainMat2); muVect(1,:) = trainMat1(u1, :); muVect(2,:) = trainMat2(u2, :); % now we need to know what the descriptive statistic is. % for each class % here we assume the descriptive statistic is % mean sim(c1, mu1) for all points c1 in class c1. classMean = zeros(numClasses,1); % initialize to zero countClass = zeros(numClasses, 1); % initialize count to zero % these are for the "cross" similarities, that is s(c1,mu2) and s(c2, mu1); classMeanX = zeros(numClasses,1); countClassX = zeros(numClasses, 1); % reverse the class labels trainLabelX = trainLabel - 1; trainLabelX(trainLabelX == 0) = 2; for j= 1:numTrain % get the similarity between the jth object in the database and % its class centroid classMean(trainLabel(j)) = classMean(trainLabel(j)) + getSimCounting(trainMat(j,1:numFeatures), muVect(trainLabel(j), :)); countClass(trainLabel(j)) = countClass(trainLabel(j)) +1; % note the trainLabelX at the end classMeanX(trainLabel(j)) = classMeanX(trainLabel(j)) + getSimCounting(trainMat(j,1:numFeatures), muVect(trainLabelX(j), :)); end % final class means are: classMean = classMean./countClass; classMeanX = classMeanX ./ countClass; % since this is counting measure, the domain of possible % similarities is 0,1,2, ... numFeatures domainVector = 0:numFeatures; % Now numerically optimize the lambda parameter in the % exponential model for g = 1:numClasses lambda(g) = findLambda(classMean(g), domainVector); lambdaX(g) = findLambda(classMeanX(g), domainVector); end % compute the normalization constants normC as well for g = 1:numClasses normC(g) = 1/(sum( exp(lambda(g)*domainVector))); normCX(g) = 1/(sum(exp(lambdaX(g)*domainVector))); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % check that this is reasonable, normalized. %psim1 = normC(1)*exp(lambda(1)*domainVector) %psim2 = normC(2)*exp(lambda(2)*domainVector) %%%%%% ITERATE THROUGH TEST POINTS %%%%%%%%%%%% for i = 1:numTest x = testMat(i, :); % pull off an x. % get simTest = s(x, classmean) for each class % and figure out how probable that similarity is for g = 1:numClasses simTest(g) = getSimCounting(x, muVect(g,:)); end % Now compute the log posterior for each class: logPosteriors = lambda .* simTest + log(normC) + log(classPriors); crossTerm = log(normCX) + lambdaX .* simTest(2:-1:1); logPosteriors = logPosteriors + crossTerm; % classify as whoever has the largest logPosterior % this assumes 0-1 costs [who, estClass(i)] = max(logPosteriors); end %iteration over test points %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%