MatrixBerryCore
uniquerows.m
Go to the documentation of this file.
1 function [uniqueMat,varargout]=uniquerows(inpMat,isInteger,forceMode)
2 import mxberry.core.throwerror;
3 persistent maxVal sqMaxVal logMaxVal;
4 %
5 nOuts=nargout;
6 isInd=nOuts>1;
7 if isInd
8  varargout=cell(1,nOuts-1);
9 else
10  varargout=cell(1,0);
11 end
12 if nargin<2
13  isInteger=false;
14 end
15 %%
16 [nRows,nCols]=size(inpMat);
17 if nRows==0||nCols==0
18  if nRows>0
19  uniqueMat=inpMat(1,:);
20  else
21  uniqueMat=inpMat;
22  end
23  if isInd
24  [varargout{:}]=deal(ones(min(nRows,1),1));
25  end
26  return;
27 end
28 isComplex=false;
29 if ~isInteger
30  isNum=isnumeric(inpMat);
31  if isNum
32  isComplex=~isreal(inpMat);
33  if isComplex
34  % transform complex numbers to real ones separating them on real
35  % and imaginery parts
36  nCols=2*nCols;
37  indMat=feval(class(inpMat),zeros(nRows,nCols));
38  indMat(:,1:2:nCols)=real(inpMat);
39  indMat(:,2:2:nCols)=imag(inpMat);
40  inpMat=indMat;
41  end
42  end
43 end
44 if nCols<2
45  % for simple situation use unique
46  if ~isInteger
47  % find nans
48  isMat=isnan(inpMat);
49  isInteger=~any(isMat);
50  end
51  if isInteger
52  % perform unique
53  [uniqueMat,varargout{:}]=unique(inpMat,'legacy');
54  else
55  if isInd
56  curInd=find(isMat,1,'last');
57  end
58  isMat=~isMat;
59  [uniqueMat,varargout{:}]=unique(inpMat(isMat),'legacy');
60  % add info for NaNs
61  uniqueMat=[uniqueMat;NaN];
62  if isInd
63  indRight2LeftVec=find(isMat);
64  varargout{1}=[indRight2LeftVec(varargout{1});curInd];
65  if nOuts>2
66  indLeft2RightVec=repmat(numel(uniqueMat),nRows,1);
67  indLeft2RightVec(isMat)=varargout{2};
68  varargout{2}=indLeft2RightVec;
69  end
70  end
71  end
72 else
73  % initial actions
74  isForceMode=nargin>=3;
75  if isempty(maxVal)||isempty(sqMaxVal)||isempty(logMaxVal)
76  maxVal=1/eps('double');
77  sqMaxVal=sqrt(maxVal);
78  logMaxVal=log2(maxVal);
79  end
80  isnOptimized=true;
81  isIntType=isinteger(inpMat);
82  isLogicalType=islogical(inpMat);
83  isCharType=ischar(inpMat);
84  isInteger=isInteger||isIntType||isLogicalType||isCharType;
85  isReshape=~isInteger;
86  indMat=inpMat;
87  isAllFinite=false;
88  if isReshape
89  if ~isNum
90  throwerror('wrongInput','type of inpMat is incorrect');
91  end
92  % reshape matrix into column vector
93  indMat=indMat(:);
94  isMat=isfinite(indMat);
95  isAllFinite=all(isMat);
96  if isAllFinite
97  minInpVal=min(indMat);
98  maxInpVal=max(indMat);
99  else
100  % replace non-finite numbers by finite ones
101  uniqueMat=indMat(isMat);
102  % determine range of finite values
103  if isempty(uniqueMat)
104  minInpVal=0;
105  maxInpVal=0;
106  else
107  minInpVal=min(uniqueMat);
108  maxInpVal=max(uniqueMat);
109  end
110  isMat=~isMat;
111  % replace -Inf
112  isCurMat=isMat;
113  isCurMat(isMat)=indMat(isMat)==-Inf;
114  if any(isCurMat)
115  curVal=minInpVal;
116  nextVal=curVal-1;
117  if nextVal==curVal
118  nextVal=2*curVal;
119  end
120  minInpVal=nextVal;
121  indMat(isCurMat)=nextVal;
122  end
123  % replace Inf
124  isCurMat(isMat)=indMat(isMat)==Inf;
125  if any(isCurMat)
126  curVal=maxInpVal;
127  nextVal=curVal+1;
128  if nextVal==curVal
129  nextVal=2*curVal;
130  end
131  maxInpVal=nextVal;
132  indMat(isCurMat)=nextVal;
133  end
134  % replace NaN
135  isCurMat(isMat)=isnan(indMat(isMat));
136  if any(isCurMat)
137  curVal=maxInpVal;
138  if curVal==Inf
139  throwerror('wrongInput',...
140  ['Range of values in inpMat is too large to ',...
141  'process it correctly']);
142  end
143  nextVal=curVal+1;
144  if nextVal==curVal
145  nextVal=2*curVal;
146  end
147  maxInpVal=nextVal;
148  indMat(isCurMat)=nextVal;
149  end
150  end
151  rangeVal=maxInpVal-minInpVal+1;
152  if rangeVal<=sqMaxVal
153  isInteger=all(fix(indMat)==indMat);
154  isnOptimized=~isInteger;
155  end
156  end
157  if isInteger&&isnOptimized
158  % calculate range of values
159  if isLogicalType
160  minInpVal=0;
161  maxInpVal=1;
162  elseif isCharType
163  minInpVal=0;
164  maxInpVal=double(intmax('uint16'));
165  else
166  if ~isReshape
167  isReshape=true;
168  indMat=indMat(:);
169  end
170  minInpVal=double(min(indMat));
171  maxInpVal=double(max(indMat));
172  end
173  % determine whether optimized version may be performed or not
174  rangeVal=maxInpVal-minInpVal+1;
175  isnOptimized=rangeVal>sqMaxVal;
176  end
177  if isForceMode
178  isnOptimized=isnOptimized||~strcmpi(forceMode,'optimized');
179  elseif ~isnOptimized
180  % determine what version (standard or optimized) is to be used
181  if rangeVal<=pow2(logMaxVal/nCols)
182  isOptimized=nRows>=500;
183  else
184  isOptimized=false;
185  end
186  isnOptimized=~isOptimized;
187  end
188  % reshape indMat from column vector into matrix if necessary
189  if isReshape&&~(isnOptimized&&isAllFinite)
190  indMat=reshape(indMat,nRows,nCols);
191  end
192  if isnOptimized
193  % perform built-in version of unique
194  if isAllFinite
195  [uniqueMat,varargout{:}]=unique(inpMat,'rows','legacy');
196  else
197  [~,indRight2LeftVec,varargout{2:end}]=unique(indMat,'rows',...
198  'legacy');
199  uniqueMat=inpMat(indRight2LeftVec,:);
200  if isInd
201  varargout{1}=indRight2LeftVec;
202  end
203  end
204  if isComplex
205  uniqueMat=complex(uniqueMat(:,1:2:nCols),uniqueMat(:,2:2:nCols));
206  end
207  return;
208  end
209  % calculate codes for rows
210  nAllCols=nCols;
211  if ~isa(indMat,'double')
212  indMat=double(indMat);
213  end
214  indMat=indMat+(1-minInpVal);
215  indMat=indMat(:,nCols:-1:1); % flip columns to obtain desired sorting
216  allSizeVec=max(indMat,[],1);
217  while nCols>1
218  iCol=0;
219  lenVec=[];
220  % break all columns on segments
221  while iCol<nCols
222  curInd=max(find(cumprod(allSizeVec(iCol+1:end))<=...
223  maxVal,1,'last'),2);
224  if isempty(curInd)
225  curInd=2;
226  end
227  lenVec=horzcat(lenVec,curInd); %#ok<AGROW>
228  iCol=iCol+curInd;
229  end
230  % perform num2cell(inpMat1,1)
231  auxCell=cell(1,nCols);
232  for iCol=1:nCols
233  auxCell{iCol}=indMat(:,iCol);
234  end
235  nCurCols=nCols;
236  nCols=numel(lenVec);
237  if nCols==1
238  % get column vector with codes
239  indMat=sub2ind(allSizeVec,auxCell{:});
240  else
241  indMat=indMat(:,1:nCols);
242  sizeVec=nan(1,nCols);
243  % adjust lenVec
244  lenVec(end)=lenVec(end)+nCurCols-sum(lenVec);
245  % if necessary, process last segment with single column
246  if lenVec(end)==1
247  indMat(:,nCols)=auxCell{nCurCols};
248  sizeVec(nCols)=allSizeVec(nCurCols);
249  nCurCols=nCols-1;
250  else
251  nCurCols=nCols;
252  end
253  % get codes for all segments
254  leftIndVec=[1 cumsum(lenVec(1:nCurCols-1))+1];
255  for iCol=1:nCurCols
256  curInd=leftIndVec(iCol)+(0:lenVec(iCol)-1);
257  [uniqueLinInd,~,indMat(:,iCol)]=unique(...
258  sub2ind(allSizeVec(curInd),auxCell{curInd}));
259  sizeVec(iCol)=length(uniqueLinInd);
260  end
261  allSizeVec=sizeVec;
262  end
263  end
264  % perform built-in unique for codes
265  if nCols==0
266  indRight2LeftVec=1;
267  if nOuts>2
268  varargout{2}=ones(nRows,1);
269  end
270  else
271  [~,indRight2LeftVec,varargout{2:end}]=unique(indMat);
272  end
273  uniqueMat=inpMat(indRight2LeftVec,:);
274  if isComplex
275  uniqueMat=complex(uniqueMat(:,1:2:nAllCols),uniqueMat(:,2:2:nAllCols));
276  end
277  if isInd
278  varargout{1}=indRight2LeftVec;
279  end
280 end
function num2cell(in inpArray, in varargin)
NUM2CELL is an extension of Matlab built-in function "num2cell" designed to work correctly with empty...
function throwerror(in msgTag, in varargin)
THROWERROR works similarly to built-in ERROR function in case when there is no output arguments but s...
function uniquerows(in inpMat, in isInteger, in forceMode)
UNIQUEROWS finds unique rows in input matrix, i.e. the more effective version of UNIQUE(. . .,&#39;rows&#39;)
function repmat(in inpArray, in varargin)
function unique(in inpVec)
UNIQUE for arrays of any type.