|
| 1 | + |
| 2 | +function errors = checkHyperlinks(mlxfile, varargin) |
| 3 | + |
| 4 | +% checkHyperlinks - check if hyperlinks are valid |
| 5 | +% |
| 6 | +% Syntax: |
| 7 | +% ------- |
| 8 | +% errors = checkHyperlinks(mlxfile) |
| 9 | +% errors = checkHyperlinks(mlxfile, fullcheck = tf) |
| 10 | +% errors = checkHyperlinks(mlxfile, showInfo = tf) |
| 11 | +% errors = checkHyperlinks(mlxfile, fullcheck = tf, showInfo = tf) |
| 12 | +% |
| 13 | +% Input arguments: |
| 14 | +% mlxfile - a string or character array. The name of the file. |
| 15 | +% tf - true or false. Default is true. |
| 16 | +% |
| 17 | +% Output argument: |
| 18 | +% ---------------- |
| 19 | +% errors - a string array or []. |
| 20 | +% If errors is non-empty. it contains the error messages. |
| 21 | +% An error message is of the following form: |
| 22 | +% "penny.mlx: File 'test.mlx' not found." |
| 23 | +% or |
| 24 | +% "penny.mlx: ID 'H_3E47FD11' not found." |
| 25 | +% |
| 26 | +% Description: |
| 27 | +% ------------ |
| 28 | +% checkHyperlinks(mlxfile) |
| 29 | +% Checks all hyperlinks in the file 'mlxfile' and in all .mlx files |
| 30 | +% references by 'mlxfiles'. In addition, statistical information are |
| 31 | +% displayed on the screen. |
| 32 | +% |
| 33 | +% checkHyperlinks(mlxfile, fullcheck = false) |
| 34 | +% By default, fullcheck is set to true which means that all .mlx files |
| 35 | +% referenced by 'mlxfile' are checked. To check only the hyperlinks in the |
| 36 | +% file 'mlxfile', set fullCheck to false. |
| 37 | +% |
| 38 | +% checkHyperlinks(mlxfile, showInfo = false) |
| 39 | +% By default, showInfo is set to true which means that information are |
| 40 | +% displayed on the screen. Set showInfo to false to suppress the display |
| 41 | +% of such information. |
| 42 | +% |
| 43 | +% checkHyperlinks(mlxfile) is equivalent to |
| 44 | +% checkHyperlinks(mlxfile, fullcheck = true, showInfo = true) |
| 45 | +% |
| 46 | +% Examples: |
| 47 | +% --------- |
| 48 | +% >> checkHyperlinks("penny.mlx"); |
| 49 | +% |
| 50 | +% >> checkHyperlinks("./demo/penny.mlx", fullcheck = false); |
| 51 | +% |
| 52 | +% >> errors = checkHyperlinks("penny.mlx", showInfo = false); |
| 53 | +% |
| 54 | + |
| 55 | + |
| 56 | +%------------------------------------------------------------------------- |
| 57 | +% Argument checking |
| 58 | +%------------------------------------------------------------------------- |
| 59 | +if ~(ischar(mlxfile) || isstring(mlxfile)) |
| 60 | + error("First argument should be a string or character array."); |
| 61 | +end |
| 62 | + |
| 63 | +if ~exist(mlxfile, "file") |
| 64 | + error("Can't find file '%s'.", mlxfile); |
| 65 | +end |
| 66 | + |
| 67 | +p = inputParser; |
| 68 | +addParameter(p, "showInfo", true, @islogical); |
| 69 | +addParameter(p, "fullCheck", true, @islogical); |
| 70 | +parse(p, varargin{:}); |
| 71 | + |
| 72 | +options = p.Results; |
| 73 | + |
| 74 | +%------------------------------------------------------------------------- |
| 75 | + |
| 76 | +try |
| 77 | + % Call main function |
| 78 | + errors = checkHyperlinksInternal(mlxfile, {}, {}, {}, options); |
| 79 | +catch ME |
| 80 | + throw(ME); |
| 81 | +end |
| 82 | + |
| 83 | +%------------------------------------------------------------------------- |
| 84 | + |
| 85 | +if options.showInfo |
| 86 | + if isempty(errors) |
| 87 | + fprintf("\nAll hyperlinks are valid.\n\n"); |
| 88 | + else |
| 89 | + n = numel(errors); |
| 90 | + if n == 1 |
| 91 | + fprintf("\n1 hyperlink is not valid.\n\n"); |
| 92 | + else |
| 93 | + fprintf("\n%d hyperlinks are not valid.\n\n", n); |
| 94 | + end |
| 95 | + end |
| 96 | +end |
| 97 | + |
| 98 | +end % checkHyperlinks |
| 99 | + |
| 100 | +%------------------------------------------------------------------------- |
| 101 | + |
| 102 | +function [errors, alreadyChecked, alreadyCheckedAnchor] = ... |
| 103 | + checkHyperlinksInternal( ... |
| 104 | + mlxfile, checkAnchors, alreadyChecked, alreadyCheckedAnchor, options ... |
| 105 | +) |
| 106 | + |
| 107 | +mlxfile = normalize(mlxfile); |
| 108 | + |
| 109 | +if ~isempty(checkAnchors) |
| 110 | + %--------------------------------------------------------------------- |
| 111 | + % Only check if anchors exist |
| 112 | + %--------------------------------------------------------------------- |
| 113 | + if ~isempty(intersect(alreadyCheckedAnchor, mlxfile)) |
| 114 | + % mlxfile was already checked. Nothing to do. |
| 115 | + errors = []; |
| 116 | + return; |
| 117 | + end |
| 118 | + errors = checkAnchorsIDs(mlxfile, checkAnchors, options); |
| 119 | + % The following statements are needed to avoid infinite loops, |
| 120 | + % because hyperlinks can go back and forth between files. |
| 121 | + alreadyCheckedAnchor = [alreadyCheckedAnchor, mlxfile]; |
| 122 | + alreadyCheckedAnchor = unique(alreadyCheckedAnchor); |
| 123 | + return; |
| 124 | +end |
| 125 | + |
| 126 | +if ~isempty(intersect(alreadyChecked, mlxfile)) |
| 127 | + % mlxfile was already checked. Nothing to do. |
| 128 | + errors = []; |
| 129 | + return; |
| 130 | +end |
| 131 | + |
| 132 | +text = readDocumentXML(mlxfile); |
| 133 | + |
| 134 | +if options.showInfo |
| 135 | + fprintf("\nScanning %s ...\n", mlxfile); |
| 136 | +end |
| 137 | + |
| 138 | +errors = []; checkfiles = {}; checkmlxfiles = {}; |
| 139 | + |
| 140 | +xmlTagBegin = '<w:hyperlink'; xmlTagEnd = '>'; |
| 141 | +links = extractBetween(text, xmlTagBegin, xmlTagEnd); |
| 142 | + |
| 143 | +%------------------------------------------------------------------------- |
| 144 | +% Check internal hyperlinks |
| 145 | +%------------------------------------------------------------------------- |
| 146 | +% |
| 147 | +% Link : <w:hyperlink w:anchor="internal:<id>"> |
| 148 | +% Anchor: <w:bookmarkStart w:id="<id>" ... /> |
| 149 | +%------------------------------------------------------------------------- |
| 150 | +exclude = @(str) ~contains(str, 'w:docLocation'); |
| 151 | +IDs = links(cellfun(exclude, links)); |
| 152 | +IDs = extractBetween(IDs, 'w:anchor="internal:', '"'); |
| 153 | +if isempty(IDs) |
| 154 | + if options.showInfo |
| 155 | + fprintf(" No internal hyperlinks found.\n"); |
| 156 | + end |
| 157 | +else |
| 158 | + n = numel(IDs); |
| 159 | + if options.showInfo |
| 160 | + fprintf(" %d internal hyperlink(s) found.\n", n); |
| 161 | + fprintf(" Checking internal hyperlinks:\n"); |
| 162 | + end |
| 163 | + err = false; |
| 164 | + bookmarks = extractBetween(text, '<w:bookmarkStart', '/>'); |
| 165 | + for i=1:n |
| 166 | + bookmark = sprintf('w:id="%s"', IDs{i}); |
| 167 | + if ~any(contains(bookmarks, bookmark)) |
| 168 | + if options.showInfo |
| 169 | + fprintf(" Error: ID '%s' not found.\n", IDs{i}); |
| 170 | + end |
| 171 | + errmsg = sprintf("%s: ID '%s' not found.", mlxfile, IDs{i}); |
| 172 | + errors = addError(errors, errmsg); |
| 173 | + errors = [errors, errmsg]; %#ok |
| 174 | + err = true; |
| 175 | + end |
| 176 | + end |
| 177 | + if options.showInfo && ~err |
| 178 | + fprintf(" Internal hyperlinks are valid.\n"); |
| 179 | + end |
| 180 | +end |
| 181 | + |
| 182 | +%------------------------------------------------------------------------- |
| 183 | +% Check hyperlinks to other files |
| 184 | +%------------------------------------------------------------------------- |
| 185 | +% |
| 186 | +% Link: <w:hyperlink w:docLocation="matlab:open('<filename>')"> |
| 187 | +% Anchor: No anchor in <filename>. It's just the filename. |
| 188 | +%------------------------------------------------------------------------- |
| 189 | +exclude = @(str) ~contains(str, 'w:anchor="internal:'); |
| 190 | +IDs = links(cellfun(exclude, links)); |
| 191 | +try |
| 192 | + files = extractBetween(IDs, 'w:docLocation="matlab:open(''', ''')"'); |
| 193 | +catch |
| 194 | + include = @(str) contains(str, 'w:docLocation="matlab:open('''); |
| 195 | + IDlimited = IDs(cellfun(include,IDs)); |
| 196 | + files = extractBetween(IDlimited, 'w:docLocation="matlab:open(''', ''')"'); |
| 197 | +end |
| 198 | +if isempty(IDs) |
| 199 | + if options.showInfo |
| 200 | + fprintf(" No hyperlinks to other files found.\n"); |
| 201 | + end |
| 202 | +else |
| 203 | + n = numel(files); |
| 204 | + if options.showInfo |
| 205 | + fprintf(" %d hyperlink(s) to external file(s) found.\n", n); |
| 206 | + fprintf(" Checking hyperlinks:\n"); |
| 207 | + end |
| 208 | + for i=1:n |
| 209 | + [~,~,ext] = fileparts(files{i}); |
| 210 | + if ext == ".mlx" |
| 211 | + checkmlxfiles{end+1} = files{i}; %#ok |
| 212 | + else |
| 213 | + checkfiles{end+1} = files{i}; %#ok |
| 214 | + end |
| 215 | + end |
| 216 | +end |
| 217 | + |
| 218 | +%------------------------------------------------------------------------- |
| 219 | +% Check if referenced files - mlx and non-mlx files - exist. |
| 220 | +%------------------------------------------------------------------------- |
| 221 | +allfiles = union(checkmlxfiles, checkfiles); |
| 222 | +err = false; n = numel(allfiles); fileNotFound = {}; |
| 223 | +for i=1:n |
| 224 | + errmsg = existFile(allfiles{i}, mlxfile, options.showInfo); |
| 225 | + errors = addError(errors, errmsg); |
| 226 | + if ~isempty(errmsg) |
| 227 | + err = true; |
| 228 | + fileNotFound = [fileNotFound, allfiles{i}]; %#ok |
| 229 | + end |
| 230 | +end |
| 231 | +if n > 0 && options.showInfo && ~err |
| 232 | + fprintf(" External files exist, therefore hyperlinks are valid.\n"); |
| 233 | +end |
| 234 | + |
| 235 | +%------------------------------------------------------------------------- |
| 236 | +% Check hyperlinks to a location in another .mlx file |
| 237 | +%------------------------------------------------------------------------- |
| 238 | +% |
| 239 | +% Link : <w:hyperlink w:anchor="internal:<id>" |
| 240 | +% w:docLocation="matlab:open('<filename>')"> |
| 241 | +% In the file <filename>: |
| 242 | +% Anchor: <w:bookmarkStart w:id="<id>" ... /> |
| 243 | +%------------------------------------------------------------------------- |
| 244 | +include = @(str) contains(str, 'w:anchor="internal:') && ... |
| 245 | + contains(str, 'w:docLocation'); |
| 246 | +IDs = links(cellfun(include, links)); |
| 247 | +files = extractBetween(IDs, 'w:docLocation="matlab:open(''', ''')"'); |
| 248 | +anchors = extractBetween(IDs, 'w:anchor="internal:', '"'); |
| 249 | +if isempty(IDs) |
| 250 | + if options.showInfo |
| 251 | + fprintf(" No hyperlinks to a location in another .mlx file found.\n"); |
| 252 | + end |
| 253 | +else |
| 254 | + checkAnchorFiles = {}; |
| 255 | + n = numel(files); |
| 256 | + if options.showInfo |
| 257 | + fprintf(" %d hyperlink(s) to a location in another .mlx file found.\n", n); |
| 258 | + fprintf(" Checking hyperlinks:\n"); |
| 259 | + end |
| 260 | + for i=1:n |
| 261 | + file = normalize(files{i}); |
| 262 | + idx = find(contains(checkAnchorFiles, file) > 0); |
| 263 | + if isempty(idx) |
| 264 | + checkAnchorFiles{end+1} = file; %#ok |
| 265 | + checkAnchors{end+1} = {anchors{i}}; %#ok |
| 266 | + else |
| 267 | + checkAnchors{idx} = [checkAnchors{idx}, anchors{i}]; |
| 268 | + end |
| 269 | + end |
| 270 | + checkAnchorFiles = setdiff(checkAnchorFiles, fileNotFound); |
| 271 | + err = false; alreadyCheckedAnchor = {}; |
| 272 | + for i=1:numel(checkAnchorFiles) |
| 273 | + [errmsg, alreadyChecked, alreadyCheckedAnchor] = ... |
| 274 | + checkHyperlinksInternal( ... |
| 275 | + checkAnchorFiles{i}, checkAnchors{i}, alreadyChecked, ... |
| 276 | + alreadyCheckedAnchor, options ... |
| 277 | + ); |
| 278 | + if ~isempty(errmsg) |
| 279 | + err = true; |
| 280 | + errors = addError(errors, errmsg); |
| 281 | + end |
| 282 | + end |
| 283 | + if options.showInfo && ~err |
| 284 | + fprintf(" External hyperlinks are valid.\n"); |
| 285 | + end |
| 286 | +end |
| 287 | + |
| 288 | +%------------------------------------------------------------------------- |
| 289 | +if ~options.fullCheck |
| 290 | + return |
| 291 | +end |
| 292 | + |
| 293 | +%------------------------------------------------------------------------ |
| 294 | +% Now do a check of all referenced mlx files. |
| 295 | +%------------------------------------------------------------------------ |
| 296 | + |
| 297 | +% The following statements are needed to avoid infinite loops, |
| 298 | +% because hyperlinks can go back and forth between files. |
| 299 | +alreadyChecked = [alreadyChecked, mlxfile]; |
| 300 | +alreadyChecked = unique(alreadyChecked); |
| 301 | +checkmlxfiles = setdiff(checkmlxfiles, alreadyChecked); |
| 302 | +checkmlxfiles = setdiff(checkmlxfiles, fileNotFound); |
| 303 | + |
| 304 | +for i=1:numel(checkmlxfiles) |
| 305 | + [errmsg, alreadyChecked, alreadyCheckedAnchor] = ... |
| 306 | + checkHyperlinksInternal( ... |
| 307 | + checkmlxfiles{i}, {}, alreadyChecked, alreadyCheckedAnchor, options ... |
| 308 | + ); |
| 309 | + errors = addError(errors, errmsg); |
| 310 | +end |
| 311 | + |
| 312 | +end % checkHyperlinksInternal |
| 313 | + |
| 314 | +%------------------------------------------------------------------------- |
| 315 | +function text = readDocumentXML(mlxfile) |
| 316 | +% Create a temporary folder |
| 317 | +[~,folder,ext] = fileparts(mlxfile); |
| 318 | +if ~strcmp(ext, ".mlx") |
| 319 | + error("First argument should be a filename with the extension .mlx"); |
| 320 | +end |
| 321 | +folder = fullfile(tempdir, folder); |
| 322 | +% Try to unzip the mlxfile |
| 323 | +try |
| 324 | + % Unzip mlxfiles |
| 325 | + unzip(mlxfile, folder); |
| 326 | + % Make sure that the temporarly created folder is deleted when done |
| 327 | + obj = onCleanup(@() rmDir(folder)); |
| 328 | +catch ME |
| 329 | + error("'%s' is not a Live Code file or a corrupted Live Code file.", mlxfile); |
| 330 | +end |
| 331 | + |
| 332 | +% Read the entire content of <folder>/matlab/document.xml |
| 333 | +try |
| 334 | + text = fileread(fullfile(folder, "matlab", "document.xml")); |
| 335 | +catch ME |
| 336 | + throw(ME); |
| 337 | +end |
| 338 | + |
| 339 | +end % readDocumentXML |
| 340 | + |
| 341 | +%------------------------------------------------------------------------- |
| 342 | + |
| 343 | +function rmDir(folder) |
| 344 | +if exist(folder, "dir") |
| 345 | + rmdir(folder, "s"); |
| 346 | +end |
| 347 | +end % rmDir |
| 348 | + |
| 349 | +%------------------------------------------------------------------------- |
| 350 | + |
| 351 | +function errors = addError(errors, errmsg) |
| 352 | +if isempty(errors) |
| 353 | + if ~isempty(errmsg) |
| 354 | + errors = errmsg; |
| 355 | + end |
| 356 | +elseif ~isempty(errmsg) |
| 357 | + errors = [errors, errmsg]; |
| 358 | +end |
| 359 | +end % addError |
| 360 | + |
| 361 | +%------------------------------------------------------------------------- |
| 362 | + |
| 363 | +function errmsg = existFile(file, mlxfile, showInfo) |
| 364 | +if ~exist(file, "file") |
| 365 | + if showInfo |
| 366 | + fprintf(" Error: File '%s' not found.\n", file); |
| 367 | + end |
| 368 | + errmsg = sprintf("%s: File '%s' not found.", mlxfile, file); |
| 369 | +else |
| 370 | + errmsg = []; |
| 371 | +end |
| 372 | +end % existFile |
| 373 | + |
| 374 | +%------------------------------------------------------------------------- |
| 375 | + |
| 376 | +function filename = normalize(filename) |
| 377 | +filename = which(filename); |
| 378 | +end % normalize |
| 379 | + |
| 380 | +%------------------------------------------------------------------------- |
| 381 | + |
| 382 | +function errors = checkAnchorsIDs(mlxfile, IDs, options) |
| 383 | +text = readDocumentXML(mlxfile); |
| 384 | +errors = []; n = numel(IDs); |
| 385 | +bookmarks = extractBetween(text, '<w:bookmarkStart', '/>'); |
| 386 | +for i=1:n |
| 387 | + bookmark = sprintf('w:id="%s"', IDs{i}); |
| 388 | + if ~any(contains(bookmarks, bookmark)) |
| 389 | + if options.showInfo |
| 390 | + fprintf(" Error: ID '%s' not found.\n", IDs{i}); |
| 391 | + end |
| 392 | + errmsg = sprintf("%s: ID '%s' not found.", mlxfile, IDs{i}); |
| 393 | + errors = addError(errors, errmsg); |
| 394 | + errors = [errors, errmsg]; %#ok |
| 395 | + end |
| 396 | +end |
| 397 | +end |
| 398 | + |
0 commit comments