1 ############################################################################## 2 # @file ExternalData.cmake 3 # @brief Manage data files stored outside the source tree. 5 # Copyright 2010-2011 Kitware, Inc. All rights reserved. 6 # File modified by Andreas Schuh. 9 ############################################################################## 11 # - Manage data files stored outside source tree 12 # Use this module to unambiguously reference data files stored outside the 13 # source tree and fetch them at build time from arbitrary local and remote 14 # content-addressed locations. Functions provided by this module recognize 15 # arguments with the syntax "DATA{<name>}" as references to external data, 16 # replace them with full paths to local copies of those data, and create build 17 # rules to fetch and update the local copies. 19 # The DATA{} syntax is literal and the <name> is a full or relative path 20 # within the source tree. The source tree must contain either a real data 21 # file at <name> or a "content link" at <name><ext> containing a hash of the 22 # real file using a hash algorithm corresponding to <ext>. For example, the 23 # argument "DATA{img.png}" may be satisfied by either a real "img.png" file in 24 # the current source directory or a "img.png.md5" file containing its MD5 sum. 26 # The 'ExternalData_Expand_Arguments' function evaluates DATA{} references 27 # in its arguments and constructs a new list of arguments: 28 # ExternalData_Expand_Arguments( 29 # <target> # Name of data management target 30 # <outVar> # Output variable 31 # [args...] # Input arguments, DATA{} allowed 33 # It replaces each DATA{} reference argument with the full path of a real 34 # data file on disk that will exist after the <target> builds. 36 # The 'ExternalData_Add_Test' function wraps around the CMake add_test() 37 # command but supports DATA{} reference arguments: 38 # ExternalData_Add_Test( 39 # <target> # Name of data management target 40 # ... # Arguments of add_test(), DATA{} allowed 42 # It passes its arguments through ExternalData_Expand_Arguments and then 43 # invokes add_test() using the results. 45 # The 'ExternalData_Add_Target' function creates a custom target to manage 46 # local instances of data files stored externally: 47 # ExternalData_Add_Target( 48 # <target> # Name of data management target 50 # It creates custom commands in the target as necessary to make data files 51 # available for each DATA{} reference previously evaluated by other functions 52 # provided by this module. A list of URL templates must be provided in the 53 # variable ExternalData_URL_TEMPLATES using the placeholders "%(algo)" and 54 # "%(hash)" in each template. Data fetch rules try each URL template in order 55 # by substituting the hash algorithm name for "%(algo)" and the hash value for 58 # The following hash algorithms are supported: 59 # %(algo) <ext> Description 60 # ------- ----- ----------- 61 # MD5 .md5 Message-Digest Algorithm 5, RFC 1321 62 # Note that the hashes are used only for unique data identification and 63 # download verification. This is not security software. 66 # include(ExternalData) 67 # set(ExternalData_URL_TEMPLATES "file: 68 # "http://data.org/%(algo)/%(hash)")
69 # ExternalData_Add_Test(MyData 71 # COMMAND MyExe DATA{MyInput.png} 73 # ExternalData_Add_Target(MyData) 74 # When test "MyTest" runs the "DATA{MyInput.png}" argument will be replaced by 75 # the full path to a real instance of the data file "MyInput.png" on disk. If 76 # the source tree contains a content link such as "MyInput.png.md5" then the 77 # "MyData" target creates a real "MyInput.png" in the build tree. 79 # The DATA{} syntax can automatically recognize and fetch a file series. If 80 # the source tree contains a group of files or content links named like a 81 # series then a DATA{} reference to one member adds rules to fetch all of 82 # them. Although all members of a series are fetched, only the file 83 # originally named by the DATA{} argument is substituted for it. Two 84 # variables configure recognition of a series from DATA{<name>}. First, 85 # ExternalData_SERIES_PARSE is a regex of the form "^(...)(...)(...)$" to 86 # parse <prefix>, <number>, and <suffix> parts from <name>. Second, 87 # ExternalData_SERIES_MATCH is a regex matching the <number> part of series 88 # members named <prefix><number><suffix>. Note that the <suffix> of a series 89 # does not include a hash-algorithm extension. Both series configuration 90 # variables have default values that work well for common cases. 92 # The variable ExternalData_LINK_CONTENT may be set to the name of a supported 93 # hash algorithm to enable automatic conversion of real data files referenced 94 # by the DATA{} syntax into content links. For each such <file> a content 95 # link named "<file><ext>" is created. The original file is renamed to the 96 # form ".ExternalData_<algo>_<hash>" to stage it for future transmission to 97 # one of the locations in the list of URL templates (by means outside the 98 # scope of this module). The data fetch rule created for the content link 99 # will use the staged object if it cannot be found using any URL template. 101 # The variable ExternalData_SOURCE_ROOT may be set to the highest source 102 # directory containing any path named by a DATA{} reference. The default is 103 # CMAKE_SOURCE_DIR. ExternalData_SOURCE_ROOT and CMAKE_SOURCE_DIR must refer 104 # to directories within a single source distribution (e.g. they come together 107 #============================================================================= 108 # Copyright 2010-2011 Kitware, Inc. 109 # All rights reserved. 111 # Redistribution and use in source and binary forms, with or without 112 # modification, are permitted provided that the following conditions 115 # * Redistributions of source code must retain the above copyright 116 # notice, this list of conditions and the following disclaimer. 118 # * Redistributions in binary form must reproduce the above copyright 119 # notice, this list of conditions and the following disclaimer in the 120 # documentation and/or other materials provided with the distribution. 122 # * Neither the names of Kitware, Inc., the Insight Software Consortium, 123 # nor the names of their contributors may be used to endorse or promote 124 # products derived from this software without specific prior written 127 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 128 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 129 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 130 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 131 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 132 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 133 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 134 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 135 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 136 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 137 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 138 #============================================================================= 140 ############################################################################## 141 # @brief @todo Document function. 143 # @param [in] target Name of the test. 147 add_test(${testArgs})
150 ############################################################################## 151 # @brief @todo Document funtion. 153 # @param [in] target Name of the external data target. 156 if(NOT ExternalData_URL_TEMPLATES)
157 message(FATAL_ERROR
"ExternalData_URL_TEMPLATES is not set!")
159 set(config ${CMAKE_CURRENT_BINARY_DIR}/${target}_config.cmake)
160 configure_file(${_ExternalData_SELF_DIR}/ExternalData_config.cmake.in ${config} @ONLY)
164 # Set "_ExternalData_FILE_${file}" for each output file to avoid duplicate 165 # rules. Use local data first to prefer real files over content links. 167 # Custom commands to copy or link local data. 168 get_property(data_local GLOBAL PROPERTY _ExternalData_${target}_LOCAL)
169 foreach(entry IN LISTS data_local)
170 string(REPLACE
"|" ";" tuple
"${entry}")
171 list(GET tuple 0 file)
172 list(GET tuple 1 name)
173 if(NOT DEFINED
"_ExternalData_FILE_${file}")
174 set(
"_ExternalData_FILE_${file}" 1)
176 COMMENT
"Generating ${file}" 178 COMMAND ${CMAKE_COMMAND} -Drelative_top=${CMAKE_BINARY_DIR}
179 -Dfile=${file} -Dname=${name}
180 -DExternalData_ACTION=local
181 -DExternalData_CONFIG=${config}
185 list(APPEND files
"${file}")
189 # Custom commands to fetch remote data. 190 get_property(data_fetch GLOBAL PROPERTY _ExternalData_${target}_FETCH)
191 foreach(entry IN LISTS data_fetch)
192 string(REPLACE
"|" ";" tuple
"${entry}")
193 list(GET tuple 0 file)
194 list(GET tuple 1 name)
195 list(GET tuple 2 ext)
196 if(NOT DEFINED
"_ExternalData_FILE_${file}")
197 set(
"_ExternalData_FILE_${file}" 1)
199 # Users care about the data file, so hide the hash/timestamp file.
200 COMMENT
"Generating ${file}" 201 # The hash/timestamp file
is the output from the build perspective.
202 # List the real file as a second output in
case it
is a broken link.
203 # The files must be listed in
this order so CMake can hide from the
204 # make tool that a symlink target may not be newer than the input.
205 OUTPUT
"${file}${ext}" "${file}" 206 # Run the data fetch/update script.
207 COMMAND ${CMAKE_COMMAND} -DExternalData_OBJECT_DIR=${CMAKE_BINARY_DIR}/ExternalData/Objects
208 -Drelative_top=${CMAKE_BINARY_DIR}
209 -Dfile=${file} -Dname=${name} -Dext=${ext}
210 -DExternalData_ACTION=fetch
211 -DExternalData_CONFIG=${config}
213 # Update whenever the object hash changes. 214 DEPENDS
"${name}${ext}" 216 list(APPEND files
"${file}${ext}")
220 # Custom target to drive all update commands. 221 add_custom_target(${target} ALL DEPENDS ${files})
224 ############################################################################## 225 # @brief Replace DATA{} references with real arguments. 227 # @param [in] target Name of the external data target. 228 # @param [out] outArgsVar List of expanded arguments. 231 # Replace DATA{} references with real arguments.
232 set(data_regex
"^xDATA{([^{}\r\n]*)}$")
234 foreach(arg IN LISTS ARGN)
235 if(
"x${arg}" MATCHES
"${data_regex}")
236 string(REGEX REPLACE
"${data_regex}" "\\1" data
"x${arg}")
237 _ExternalData_arg(
"${target}" "${arg}" "${data}" file)
238 list(APPEND outArgs "${file}
") 240 list(APPEND outArgs "${arg}
") 243 set("${outArgsVar}
" "${outArgs}
" PARENT_SCOPE) 246 #----------------------------------------------------------------------------- 247 # Private helper interface 249 set(_ExternalData_SELF "${CMAKE_CURRENT_LIST_FILE}
") 252 function(_ExternalData_compute_hash var_hash algo file) 253 if("${
algo}
" STREQUAL "MD5
") 255 execute_process(COMMAND "${CMAKE_COMMAND}
" -E md5sum "${file}
" 256 OUTPUT_VARIABLE output) 257 string(SUBSTRING ${output} 0 32 hash) 258 set("${var_hash}
" "${hash}
" PARENT_SCOPE) 260 # TODO: Other hashes. 261 message(FATAL_ERROR "Hash algorithm ${
algo} unimplemented.
") 265 function(_ExternalData_atomic_write file content) 266 string(RANDOM LENGTH 6 random) 267 set(tmp "${file}.tmp${random}
") 268 file(WRITE "${tmp}
" "${content}
") 269 file(RENAME "${tmp}
" "${file}
") 272 function(_ExternalData_link_content name var_ext) 273 if("${ExternalData_LINK_CONTENT}
" MATCHES "^(MD5)$
") 274 set(algo "${ExternalData_LINK_CONTENT}
") 277 "Unknown hash algorithm specified by ExternalData_LINK_CONTENT:\n
" 278 " ${ExternalData_LINK_CONTENT}
") 280 _ExternalData_compute_hash(hash "${
algo}
" "${name}
") 281 get_filename_component(dir "${name}
" PATH) 282 set(staged "${dir}/.ExternalData_${
algo}_${hash}
") 284 _ExternalData_atomic_write("${name}${ext}
" "${hash}\n
") 285 file(RENAME "${name}
" "${staged}
") 286 set("${var_ext}
" "${ext}
" PARENT_SCOPE) 288 file(RELATIVE_PATH relname "${ExternalData_SOURCE_ROOT}
" "${name}${ext}
") 289 message(STATUS "Linked ${relname} to ExternalData ${
algo}/${hash}
") 292 function(_ExternalData_arg target arg data var_file) 293 # Convert to full path. 294 if(IS_ABSOLUTE "${data}
") 295 set(absdata "${data}
") 297 # TODO: If ${data} does not start in "./
" or "../
" then use search path? 298 get_filename_component(absdata "${CMAKE_CURRENT_SOURCE_DIR}/${data}
" ABSOLUTE) 301 # Convert to relative path under the source tree. 302 if(NOT ExternalData_SOURCE_ROOT) 303 set(ExternalData_SOURCE_ROOT "${CMAKE_SOURCE_DIR}
") 305 set(top_src "${ExternalData_SOURCE_ROOT}
") 306 file(RELATIVE_PATH reldata "${top_src}
" "${absdata}
") 307 if(IS_ABSOLUTE "${reldata}
" OR "${reldata}
" MATCHES "^\\.\\./
") 308 message(FATAL_ERROR "Data file referenced by argument\n
" 310 "does not lie under the top-level source directory\n
" 313 set(top_bin "${CMAKE_BINARY_DIR}/ExternalData
") # TODO: .../${target} ? 315 # Configure series parsing and matching. 316 if(ExternalData_SERIES_PARSE) 317 if(NOT "${ExternalData_SERIES_PARSE}
" MATCHES 318 "^\\^\\([^()]*\\)\\([^()]*\\)\\([^()]*\\)\\$$
") 320 "ExternalData_SERIES_PARSE
is set to\n
" 321 " ${ExternalData_SERIES_PARSE}\n
" 323 " ^(...)(...)(...)$\n
") 325 set(series_parse "${ExternalData_SERIES_PARSE}
") 327 set(series_parse "^(.*[A-Za-z_.-])([0-9]*)(\\.[^.]*)$
") 329 if(ExternalData_SERIES_MATCH) 330 set(series_match "${ExternalData_SERIES_MATCH}
") 332 set(series_match "[_.]?[0-9]*
") 335 # Parse the base, number, and extension components of the series. 336 string(REGEX REPLACE "${series_parse}
" "\\1;\\2;\\3
" tuple "${reldata}
") 337 list(LENGTH tuple len) 338 if(NOT "${len}
" EQUAL 3) 339 message(FATAL_ERROR "Data file referenced by argument\n
" 341 "corresponds to path\n
" 343 "that does not
match regular expression\n
" 347 # Glob files that might match the series. 348 list(GET tuple 0 relbase) 349 list(GET tuple 2 ext) 350 set(pattern "${relbase}*${ext}*
") 351 file(GLOB globbed RELATIVE "${top_src}
" "${top_src}/${pattern}
") 353 # Match base, number, and extension perhaps followed by a hash ext. 354 string(REGEX REPLACE "([][+.*()^])
" "\\\\\\1
" series_base "${relbase}
") 355 string(REGEX REPLACE "([][+.*()^])
" "\\\\\\1
" series_ext "${ext}
") 356 set(series_regex "^(${series_base}${series_match}${series_ext})(\\.[^.]*|)$
") 357 set(external "") # Entries external to the source tree. 358 set(internal "") # Entries internal to the source tree. 360 foreach(entry IN LISTS globbed) 361 string(REGEX REPLACE "${series_regex}
" "\\1;\\2
" tuple "${entry}
") 362 list(LENGTH tuple len) 364 list(GET tuple 0 relname) 365 list(GET tuple 1 alg) 366 set(name "${top_src}/${relname}
") 367 set(file "${top_bin}/${relname}
") 369 list(APPEND external "${file}|${name}|${alg}
") 370 elseif(ExternalData_LINK_CONTENT) 371 _ExternalData_link_content("${name}
" alg) 372 list(APPEND external "${file}|${name}|${alg}
") 374 list(APPEND internal "${file}|${name}
") 376 if("${relname}
" STREQUAL "${reldata}
") 382 if(NOT have_original) 383 message(FATAL_ERROR "Data file referenced by argument\n
" 385 "corresponds to source tree path\n
" 387 "that does not exist (with or without an extension)!
") 391 # Make the series available in the build tree. 392 set_property(GLOBAL APPEND PROPERTY 393 _ExternalData_${target}_FETCH "${external}
") 394 set_property(GLOBAL APPEND PROPERTY 395 _ExternalData_${target}_LOCAL "${
internal}
") 396 set("${var_file}
" "${top_bin}/${reldata}
" PARENT_SCOPE) 398 # The whole series is in the source tree. 399 set("${var_file}
" "${top_src}/${reldata}
" PARENT_SCOPE) 403 #----------------------------------------------------------------------------- 404 # Private script mode interface 406 if(CMAKE_GENERATOR OR NOT ExternalData_ACTION) 410 if(ExternalData_CONFIG) 411 include(${ExternalData_CONFIG}) 413 if(NOT ExternalData_URL_TEMPLATES) 414 message(FATAL_ERROR "No ExternalData_URL_TEMPLATES
set!
") 417 function(_ExternalData_link_or_copy src dst) 418 # Create a temporary file first. 419 get_filename_component(dst_dir "${dst}
" PATH) 420 file(MAKE_DIRECTORY "${dst_dir}
") 421 string(RANDOM LENGTH 6 random) 422 set(tmp "${dst}.tmp${random}
") 424 # Create a symbolic link. 427 # Use relative path if files are close enough. 428 file(RELATIVE_PATH relsrc "${relative_top}
" "${src}
") 429 file(RELATIVE_PATH relfile "${relative_top}
" "${dst}
") 430 if(NOT IS_ABSOLUTE "${relsrc}
" AND NOT "${relsrc}
" MATCHES "^\\.\\./
" AND 431 NOT IS_ABSOLUTE "${reldst}
" AND NOT "${reldst}
" MATCHES "^\\.\\./
") 432 file(RELATIVE_PATH tgt "${dst_dir}
" "${src}
") 435 execute_process(COMMAND "${CMAKE_COMMAND}
" -E create_symlink "${tgt}
" "${tmp}
" RESULT_VARIABLE result) 438 execute_process(COMMAND "${CMAKE_COMMAND}
" -E copy "${src}
" "${tmp}
" RESULT_VARIABLE result) 441 file(REMOVE "${tmp}
") 442 message(FATAL_ERROR "Failed to create\n ${tmp}\nfrom\n ${obj}
") 445 # Atomically create/replace the real destination. 446 file(RENAME "${tmp}
" "${dst}
") 449 function(_ExternalData_download_object name hash algo var_obj) 450 set(obj "${ExternalData_OBJECT_DIR}/${
algo}/${hash}
") 452 message(STATUS "Found
object: \
"${obj}\"")
453 set(
"${var_obj}" "${obj}" PARENT_SCOPE)
457 string(RANDOM LENGTH 6 random)
458 set(tmp
"${obj}.tmp${random}")
461 foreach(url_template IN LISTS ExternalData_URL_TEMPLATES)
462 string(REPLACE
"%(hash)" "${hash}" url_tmp
"${url_template}")
463 string(REPLACE
"%(algo)" "${algo}" url
"${url_tmp}")
464 message(STATUS
"Fetching \"${url}\"")
465 file(DOWNLOAD
"${url}" "${tmp}" STATUS status SHOW_PROGRESS)
# TODO: timeout 466 set(tried
"${tried}\n ${url}")
467 list(GET status 0 err)
469 list(GET status 1 errMsg)
470 set(tried
"${tried} (${errMsg})")
472 # Verify downloaded object. 473 _ExternalData_compute_hash(dl_hash
"${algo}" "${tmp}")
474 if(
"${dl_hash}" STREQUAL
"${hash}")
478 set(tried
"${tried} (wrong hash ${algo}=${dl_hash})")
481 file(REMOVE
"${tmp}")
485 set(staged
"${dir}/.ExternalData_${algo}_${hash}")
488 file(RENAME
"${tmp}" "${obj}")
489 message(STATUS
"Downloaded object: \"${obj}\"")
490 elseif(EXISTS
"${staged}")
492 message(STATUS
"Staged object: \"${obj}\"")
494 message(FATAL_ERROR
"Object ${algo}=${hash} not found at:${tried}")
497 set(
"${var_obj}" "${obj}" PARENT_SCOPE)
500 if(
"${ExternalData_ACTION}" STREQUAL
"fetch")
501 foreach(v ExternalData_OBJECT_DIR file name ext)
502 if(NOT DEFINED
"${v}")
503 message(FATAL_ERROR
"No \"-D${v}=\" value provided!")
507 file(READ
"${name}${ext}" hash)
508 string(STRIP
"${hash}" hash)
510 if(
"${ext}" STREQUAL
".md5")
513 message(FATAL_ERROR
"Unknown hash algorithm extension \"${ext}\"")
516 _ExternalData_download_object("${name}
" "${hash}
" "${
algo}
" obj) 518 # Check if file already corresponds to the object. 519 set(file_up_to_date 0) 520 if(EXISTS "${file}
" AND EXISTS "${file}${ext}
") 521 file(READ "${file}${ext}
" f_hash) 522 string(STRIP "${f_hash}
" f_hash) 523 if("${f_hash}
" STREQUAL "${hash}
") 524 #message(STATUS "File already corresponds to
object") 525 set(file_up_to_date 1) 530 # Touch the file to convince the build system it is up to date. 531 execute_process(COMMAND "${CMAKE_COMMAND}
" -E touch "${file}
") 533 _ExternalData_link_or_copy("${obj}
" "${file}
") 536 # Atomically update the hash/timestamp file to record the object referenced. 537 _ExternalData_atomic_write("${file}${ext}
" "${hash}\n
") 538 elseif("${ExternalData_ACTION}
" STREQUAL "local
") 540 if(NOT DEFINED "${v}
") 541 message(FATAL_ERROR "No \
"-D${v}=\" value provided!")
544 _ExternalData_link_or_copy(
"${name}" "${file}")
545 elseif(
"${ExternalData_ACTION}" STREQUAL
"store")
547 if(NOT DEFINED
"${v}")
548 message(FATAL_ERROR
"No \"-D${v}=\" value provided!")
554 _ExternalData_compute_hash(hash
"${algo}" "${file}")
556 message(FATAL_ERROR
"Unknnown ExternalData_ACTION=[${ExternalData_ACTION}]")
function ExternalData_add_test(in target)
function is(in result, in expected, in name)
Test whether a given result is equal to the expected result.
def which(command, path=None, verbose=0, exts=None)
function ExternalData_expand_arguments(in target, out outArgsVar)
Replace DATA{} references with real arguments.
function ExternalData_add_target(in target)
function match(in value, in pattern)
This function implements a more portable way to do pattern matching.
function get_filename_component(inout ARGN)
Fixes CMake's get_filename_component() command.