MatrixBerryCore
xmlparse.m
Go to the documentation of this file.
1 function [SData, SMetaData] = xmlparse(inpXmlString, attSwitch, SData,level)
2 import mxberry.xml.*;
3 import mxberry.core.throwerror;
4 import mxberry.core.check.checkgen;
5 %
6 if nargin<4
7  level=0;
8  if nargin<3
9  SData = struct([]);
10  if nargin<2
11  attSwitch='on';
12  else
13  checkgen(attSwitch,@(x)ismember(x,{'on','off'}));
14  end
15  end
16 end
17 %
18 if isempty(inpXmlString)
19  return
20 end
21 % define variables
22 %
23 if level==0
24  SMetaData=struct();
25 end
26 %---------------------------
27 % remove all <! execute and comment entries from str by blanking out
28 execpos = getFindStr('<!--',inpXmlString);
29 if ~isempty(execpos)
30  allclose = getFindStr('-->',inpXmlString);
31  for x=1:length(execpos)
32  xstart = execpos(x);
33  idxclose = find(allclose > xstart);
34  xend = allclose(idxclose(1));
35  inpXmlString(xstart:(xend+2)) = blanks(xend-xstart+3);
36  end
37 end
38 %
39 indParOpenedVec = find(inpXmlString=='<'&[inpXmlString(2:end)~='/',true]);
40 %
41 indParClosedVec = sort( [getFindStr(inpXmlString, '</'), ...
42  getFindStr(inpXmlString, '/>'), ...
43  getFindStr(inpXmlString, '-->'), ...
44  getFindStr(inpXmlString, '?>')] );
45 
46 %
47 if numel(indParOpenedVec) ~= numel(indParClosedVec)
48  throwerror('wrongInput:badFile',['XML parse error: Number of ',...
49  'element start and end tags does not match.']);
50 end
51 %
52 nParOpened = length(indParOpenedVec);
53 parOpenCloseCountVec=[-ones(nParOpened,1);ones(nParOpened,1)];
54 [indParOpenedSortedVec,indParForwardSortedVec]=sort([indParOpenedVec,indParClosedVec]);
55 parIndexMat=[indParOpenedSortedVec.',parOpenCloseCountVec(indParForwardSortedVec)];
56 %
57 iElem=1;
58 nParentheses = 0;
59 itemCount=0;
60 while iElem<=size(parIndexMat,1)
61  itemCount=itemCount+1;
62  indEntryStart = parIndexMat(iElem,1);
63  nParentheses = nParentheses + parIndexMat(iElem,2);
64  while nParentheses ~= 0
65  iElem = iElem+1;
66  nParentheses = nParentheses + parIndexMat(iElem,2);
67  end
68  indEntryEnd = parIndexMat(iElem,1);
69  tmp = inpXmlString(indEntryStart+1:indEntryEnd-1);
70  %
71  typeStr = '';
72  indexStr=[];
73  %
74  headsep = getFindStr(tmp,'>');
75  if isempty(headsep)
76  % deal with "/>" empty elements by using the whole tmp string
77  headsep = length(tmp);
78  end
79 
80  namesep = min([getFindStr(tmp,' '), getFindStr(tmp,'>')]);
81  if isempty(namesep)
82  tagStr = tmp;
83  else
84  tagStr = tmp(1:namesep-1);
85  end
86 
87  header = tmp(namesep+1:headsep);
88  content = tmp(headsep+1:end);
89 
90  % make sure that we have size [0 0] and not [1 0]
91  if isempty(content)
92  content = '';
93  end
94 
95  % parse header for attributes
96  att_lst = header;
97  %
98  tokens=regexp([' ' att_lst],'\s([^=]*)="([^"]*)"','tokens');
99  %
100  isSizeSpecified=false;
101  if strcmp(attSwitch, 'on')
102  for k=1:1:length(tokens)
103  switch(tokens{k}{1})
104  case 'idx'
105  indexStr = str2double(tokens{k}{2});
106  case 'size'
107  sizeVec = str2num(tokens{k}{2}); %#ok<ST2NM>
108  isSizeSpecified=true;
109  case 'type'
110  typeStr = tokens{k}{2};
111  otherwise
112  if level==0
113  SMetaData.(tokens{k}{1})=tokens{k}{2};
114  end
115  end
116  end
117  end
118  if ~isSizeSpecified
119  if strcmpi(typeStr,'struct')
120  sizeVec=[1 1];
121  else
122  sizeVec=[0 0];
123  end
124  end
125  nElems=prod(sizeVec);
126  %
127  isnEmpty=~all(sizeVec==0);
128  % special names
129  switch (tagStr(1))
130  case {'?', '!'}
131  % ignore entity declarations and processing instructions
132  % Note: we also ignore the <?xml ...> entry with version number.
133  iElem=iElem+1;
134  continue;
135  end
136  if isempty(typeStr)
137  typeStr = 'char';
138  end
139  % remove namespace from NAME
140  indEntry = getFindStr(tagStr,':');
141  if ~isempty(indEntry)
142  tagStr = tagStr(indEntry+1:end);
143  end
144 
145  % remove namespace from TYPE
146  indEntry = find(typeStr==':');
147  if ~isempty(indEntry)
148  typeStr = typeStr(indEntry+1:end);
149  end
150 
151  % make sure TYPE is valid
152  if isempty(tagStr) || isempty(typeStr)
153  throwerror('wrongInput','NAME or TYPE is empty!')
154  end
155 
156  % check if type is correct
157  if strcmp(typeStr, 'char') && any(content=='<')
158  if strcmp(attSwitch, 'on')
159  typeStr = 'struct';
160  else
161  typeStr = 'parent';
162  end
163  end
164 
165  % check if index is correct
166  if indexStr==0
167  indexStr = [];
168  end
169 
170  if ~isempty(SData) && isfield(SData, tagStr) && isempty(indexStr)
171  cont_list = {SData.(tagStr)};
172  found = 0;
173  % this loop makes sure that the current entry is inserted
174  % after the last non-empty entry in the content vector cont_list
175  for cc=length(cont_list):-1:1
176  if ~isempty(cont_list{cc})
177  found=1;
178  break
179  end
180  end
181  if ~found
182  indexStr = max(cc-1,1);
183  else
184  indexStr = cc+1;
185  end
186  end
187 
188  if isempty(indexStr) && ~isempty(SData) && strcmp(tagStr, 'item')
189  % make sure that when we have a character array the IDX of the
190  % new vector is set to 2 and not to the end+1 index of the string.
191  if isa(SData, 'char')
192  indexStr = 2;
193  else
194  indexStr = length(SData)+1;
195  end
196  end
197 
198  if isempty(indexStr)
199  indexStr = 1;
200  end
201 
202  % switch board which decides how to convert contents according to TYPE
203  switch lower(typeStr)
204 
205  % ========================
206  case '?xml'
207  %do nothing
208  case '!--'
209  % comment, just ignore
210  iElem = iElem+1;
211  continue
212 
213  % ========================
214  case {'logical', 'boolean'}
215  c = logical(str2num(content)); %#ok<ST2NM>
216  if isnEmpty
217  c = reshape(c, sizeVec);
218  end
219 
220  % ========================
221  case {'char', 'string'}
222  c = charFuncReSubstitute(content);
223  if isempty(c) && (length(c) ~= nElems)
224  % this is a string containing only spaces
225  c = blanks(nElems);
226  end
227  %
228  if isnEmpty
229  c = reshape(c, sizeVec);
230  end
231 
232  % ========================
233  case {'struct' , 'parent'}
234  c = xmlparse(content, attSwitch, struct(), level+1);
235 
236  if ~(nElems==1)
237  c = reshape(c, sizeVec);
238  end
239 
240  if isfield(c, 'item') && strcmp(typeStr, 'struct')
241  c = {c.item};
242  end
243 
244  % ========================
245  case 'cell'
246  tmp_c = xmlparse(content, attSwitch, {}, level+1);
247 
248  if isnEmpty
249  tmp_c = reshape(tmp_c, sizeVec);
250  end
251 
252  if ~isempty(tmp_c)
253  if isfield(tmp_c, 'item')
254  c = {tmp_c.item};
255  else
256  % otherwise leave as is.
257  c = tmp_c;
258  end
259  else
260  c = {};
261  end
262  % ========================
263  % NUMERIC TYPE
264  otherwise
265  %c = feval(TYPE,str2num(content));
266  c = feval(typeStr,sscanf(content,'%f').');
267  if isnEmpty
268  c = reshape(c, sizeVec);
269  end
270  end
271 
272  % now c contains the content variable
273 
274  if isempty(SData) && indexStr==1 && level==0
275  if strcmp(tagStr, 'item')
276  % s = '<item>aaa</item>'
277  SData = {};
278  SData(indexStr) = {c}; %#ok<AGROW>
279  else
280  % s = '<root>aaa</root>'
281  SData = c;
282  end
283 
284  elseif isempty(SData) && indexStr==1 && level>0
285  if strcmp(tagStr, 'item')
286  % s = '<root><item>bbb</item></root>'
287  % s = '<root><item idx="1">a</item><item idx="2">b</item></root>'
288  SData = {};
289  SData(indexStr) = {c}; %#ok<AGROW>
290  else
291  % s = '<root><a>bbb</a></root>'
292  %X = setfield(X, {IDX}, NAME, c);
293  SData(indexStr).(tagStr)=c;
294  end
295 
296  elseif isempty(SData) && indexStr>1 && level==0
297  % s = '<root idx="4">hello</root>'
298  % s = '<item idx="4">hello</item>'
299  SData = {};
300  SData(indexStr) = {c}; %#ok<AGROW>
301 
302  elseif isempty(SData) && indexStr>1 && level>0
303  % s = '<root><ch idx="4">aaaa</ch></root>'
304  % s = '<item><ch idx="4">aaaa</ch></item>'
305  if strcmp(tagStr, 'item')
306  SData = {};
307  SData(indexStr) = {c}; %#ok<AGROW>
308  else
309  %X = setfield(X, {IDX}, NAME, c);
310  SData(indexStr).(tagStr)=c;
311  end
312 
313  elseif ~isempty(SData) && indexStr==1 && level==0
314  % s = '<item idx="3">aaa</item><item idx="1">bbb</item>'
315  if strcmp(tagStr, 'item')
316  SData(indexStr) = {c};
317  else
318  if ~(nargin<3)
319  % Example: a.b = 111; d = xmlparse(str, '', a);
320  % this only works if both are structs and X is not empty
321  if isempty(SData) || ~(isa(SData, 'struct') && isa(c, 'struct'))
322  SData = c;
323  else
324  % transfer all fields from c to X
325  N = fieldnames(c);
326  for n=1:length(N)
327  %X = setfield(X, {IDX}, N{n}, c.(N{n}));
328  SData(indexStr).(N{n})=c.(N{n});
329  end
330  end
331  else
332  % s = '<root idx="3">aaa</root><root idx="1">bbb</root>'
333  % s = '<root>aaa</root><root>bbb</root>'
334  % s = '<a><b>444</b></a><a><b>555</b></a>'
335  throwerror('wrongInput',...
336  ['XML string cannot have two ''root'' ',...
337  'entries at root level! \n',...
338  'Possible solution: Use ''item'' tags instead.']);
339  end
340  end
341 
342  elseif ~isempty(SData) && indexStr==1 && level>0
343 
344  if strcmp(tagStr, 'item')
345  % s = '<root><item idx="2">bbb</item><item idx="1">ccc</item></root>'
346  SData(indexStr) = {c};
347  else
348  % s = '<root><a idx="2">bbb</a><a idx="1">ccc</a></root>'
349  %X = setfield(X, {IDX}, NAME, c);
350  %idxCell=num2cell(IDX);
351  SData(indexStr).(tagStr)=c;
352  end
353  % BUT:
354  % s = '<root><a idx="2"><b>ccc</b></a><a idx="1">ccc</a></root>'
355  % fails because struct a has different content!
356 
357  elseif ~isempty(SData) && indexStr>1 && level==0
358 
359  % s = '<item idx="1">a</item><item idx="2">b</item>'
360  % s = '<item idx="1">a</item><item idx="2">b</item><item idx="3">c</item>'
361  if isa(SData,'char')
362  % s = '<item idx="1">a</item><item idx="2">b</item>'
363  SData = {SData};
364  %else (if not char) we would have eg the third entry as X
365  %s = '<item idx="1">a</item><item idx="2">b</item><item idx="3">c</item>'
366  %and do not need to take action
367  end
368  SData(indexStr) = {c};
369 
370  elseif ~isempty(SData) && indexStr>1 && level>0
371 
372  % s = '<root><item idx="1">a</item><item idx="2">b</item><item idx="3">c</item></root>'
373  if strcmp(tagStr, 'item')
374  if isa(SData,'char')
375  % s = '<root><item idx="1">a</item><item idx="2">b</item></root>'
376  SData = {SData};
377  end
378  SData(indexStr) = {c};
379  else
380  % s = '<root><a>bbb</a><a>ccc</a></root>'
381  %X = setfield(X, {IDX}, NAME, c);
382  SData(indexStr).(tagStr)=c;
383  end
384 
385  else
386 
387  disp('This case cannot be processed:')
388  disp(['isempty(X) = ', num2str(isempty(SData))])
389  disp(['class(X) = ', class(SData)])
390  disp(['class(c) = ', class(c)])
391  disp(['IDX = ', num2str(indexStr)])
392  disp(['LEVEL = ', num2str(level)])
393  disp('Please contact the author m.molinari@soton.ac.uk!');
394  end
395 
396  clear c;
397  iElem = iElem+1;
398 
399 end
400 
401 
402 function inpStr = charFuncReSubstitute(inpStr)
403 inpStr=strrep(inpStr,'&amp;','&');
404 inpStr=strrep(inpStr,'&lt;','<');
405 inpStr=strrep(inpStr,'&gt;','>');
406 inpStr=strrep(inpStr,'&apos;','''');
407 inpStr=strrep(inpStr,'&quot;','"');
408 %
409 
410 
411 function indVec = getFindStr(longStr, shortStr)
412 if size(longStr,2) < size(shortStr,2)
413  indVec=[];
414 else
415  indVec = strfind(longStr,shortStr);
416 end
function throwerror(in msgTag, in varargin)
THROWERROR works similarly to built-in ERROR function in case when there is no output arguments but s...
function getFindStr(in longStr, in shortStr)
find positions of occurences of string str in longstr
function checkgen(in x, in typeSpec, in varargin)
CHECKGEN checks a generic condition provided by typeSpec string in the following format: &#39;isnumeric(x...
function ismember(in leftVec, in rightVec, in varargin)
ISMEMBER - ismember implementation for arrays of any type.
function xmlparse(in inpXmlString, in attSwitch, in SData, in level)
XMLPARSE parses XML string str and returns matlab variable/structure. This is a non-validating parser...