本篇文档将对 Call Caching 的使用做一个详细的介绍,包括功能的开启和关闭、如何通过查看元数据的方式,确认 Call Caching 未生效的原因等。
如果要使用 Cromwell 的 Call Caching 功能,需要在 Server 的配置文件中设置:
call-caching {
# Allows re-use of existing results for jobs you have already run
# (default: false)
enabled = true
# Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies
# to fail for external reasons which should not invalidate the cache (e.g. auth differences between users):
# (default: true)
invalidate-bad-cache-results = true
}
call-caching.enabled 是 Call Caching 功能的开关,可以按照自己的需求开启和关闭。
在 Call Caching 功能全局开启的状态下,提交工作流时,可以通过携带如下两个 option 选项设置本次执行是否使用 Call Caching:
{
"write_to_cache": true,
"read_from_cache": true
}
工作流执行时,每一个 task 的每一个 call(对应批量计算的一个作业)都会有 metadata,记录了这个步骤的运行过程,当然也包括 Call Caching 的详细信息,通过下面的命令可以查询一个工作流的 metadata:
widdler query -m [WorkflowId]
在元数据信息中找到对应的 task 的详细信息,比如:
{
"callRoot": "oss://gene-test/cromwell_test/GATK4_VariantDiscovery_pipeline_hg38/53cfd3fc-e9d5-4431-83ec-be6c51ab9365/call-HaplotypeCaller/shard-10",
"inputs": {
"gatk_path": "/gatk/gatk",
"ref_fasta": "oss://genomics-public-data-shanghai/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
"cluster_config": "OnDemand ecs.sn2ne.xlarge img-ubuntu-vpc",
"input_bam_index": "oss://gene-test/cromwell_test/GATK4_VariantDiscovery_pipeline_hg38/cf55a2d1-572c-4490-8edf-07656802a79b/call-GatherBamFiles/NA12878.hg38.ready.bam.bai",
"output_filename": "NA12878.hg38.vcf.gz",
"contamination": null,
"ref_fasta_index": "oss://genomics-public-data-shanghai/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
"ref_dict": "oss://genomics-public-data-shanghai/broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
"interval_list": "/home/data/GATK_human_genome_resource_bundle/hg38_from_GCP/hg38_wgs_scattered_calling_intervals/temp_0047_of_50/scattered.interval_list",
"input_bam": "oss://gene-test/cromwell_test/GATK4_VariantDiscovery_pipeline_hg38/cf55a2d1-572c-4490-8edf-07656802a79b/call-GatherBamFiles/NA12878.hg38.ready.bam.bam",
"docker_image": "registry.cn-shanghai.aliyuncs.com/wgs_poc/poc:4.0.10.1"
},
"returnCode": 0,
"callCaching": {
"allowResultReuse": true,
"hashes": {
"output expression": {
"File output_vcf_index": "A162250CB6F52CC32CB75F5C5793E8BB",
"File output_vcf": "7FD061EEA1D3C63912D7B5FB1F3C5218"
},
"runtime attribute": {
"userData": "N/A",
"docker": "F323AFFA030FBB5B352C60BD7D615255",
"failOnStderr": "68934A3E9455FA72420237EB05902327",
"imageId": "N/A",
"continueOnReturnCode": "CFCD208495D565EF66E7DFF9F98764DA"
},
"output count": "C81E728D9D4C2F636F067F89CC14862C",
"input count": "D3D9446802A44259755D38E6D163E820",
"command template": "9104DF40289AB292A52C2A753FBF58D2",
"input": {
"File interval_list": "04dc2cb895d13a40657d5e2aa7d31e8c",
"String output_filename": "2B77B986117FC94D088273AD4D592964",
"File ref_fasta": "9A513FB0533F04ED87AE9CB6281DC19B-400",
"File input_bam_index": "D7CA83047E1B6B8269DF095F637621FE-1",
"String gatk_path": "EB83BBB666B0660B076106408FFC0A9B",
"String docker_image": "0981A914F6271269D58AA49FD18A6C13",
"String cluster_config": "B4563EC1789E5EB82B3076D362E6D88F",
"File ref_dict": "3884C62EB0E53FA92459ED9BFF133AE6",
"File input_bam": "9C0AC9A52F5640AA06A0EBCE6A97DF51-301",
"File ref_fasta_index": "F76371B113734A56CDE236BC0372DE0A"
},
"backend name": "AE9178757DD2A29CF80C1F5B9F34882E"
},
"effectiveCallCachingMode": "ReadAndWriteCache",
"hit": false,
"result": "Cache Miss"
},
"stderr": "oss://gene-test/cromwell_test/GATK4_VariantDiscovery_pipeline_hg38/53cfd3fc-e9d5-4431-83ec-be6c51ab9365/call-HaplotypeCaller/shard-10/stderr",
"shardIndex": 10,
"stdout": "oss://gene-test/cromwell_test/GATK4_VariantDiscovery_pipeline_hg38/53cfd3fc-e9d5-4431-83ec-be6c51ab9365/call-HaplotypeCaller/shard-10/stdout",
"outputs": {
"output_vcf": "oss://gene-test/cromwell_test/GATK4_VariantDiscovery_pipeline_hg38/53cfd3fc-e9d5-4431-83ec-be6c51ab9365/call-HaplotypeCaller/shard-10/NA12878.hg38.vcf.gz",
"output_vcf_index": "oss://gene-test/cromwell_test/GATK4_VariantDiscovery_pipeline_hg38/53cfd3fc-e9d5-4431-83ec-be6c51ab9365/call-HaplotypeCaller/shard-10/NA12878.hg38.vcf.gz.tbi"
},
"commandLine": "set -e\n\n /gatk/gatk --java-options \"-Xmx4g -Xmx4g\" \\\n HaplotypeCaller \\\n -R /cromwell_inputs/73a7571e/Homo_sapiens_assembly38.fasta \\\n -I /cromwell_inputs/02f1b5ca/NA12878.hg38.ready.bam.bam \\\n -L /home/data/GATK_human_genome_resource_bundle/hg38_from_GCP/hg38_wgs_scattered_calling_intervals/temp_0047_of_50/scattered.interval_list \\\n -O NA12878.hg38.vcf.gz \\\n -contamination 0",
"attempt": 1,
"jobId": "job-000000005DB051A800006F970001CAC8",
"start": "2019-10-25T02:38:03.522Z",
"backendStatus": "Finished",
"runtimeAttributes": {
"cluster": "Right(AutoClusterConfiguration(OnDemand,ecs.sn2ne.xlarge,img-ubuntu-vpc,None,None,None))",
"continueOnReturnCode": "0",
"failOnStderr": "false",
"vpc": "BcsVpcConfiguration(Some(10.20.200.0/24),Some(vpc-uf61zj30k0ebuen0xi7ci))",
"mounts": "BcsInputMount(Right(nas://10.20.66.4:/data/ali_yun_test/),Left(/home/data),true)",
"docker": "BcsDockerWithoutPath(registry.cn-shanghai.aliyuncs.com/wgs_poc/poc:4.0.10.1)",
"autoReleaseJob": "false",
"maxRetries": "0"
},
"executionStatus": "Done",
"end": "2019-10-25T03:22:23.481Z",
"executionEvents": [
{
"endTime": "2019-10-25T03:22:21.626Z",
"description": "RunningJob",
"startTime": "2019-10-25T02:38:03.645Z"
},
{
"endTime": "2019-10-25T03:22:22.481Z",
"description": "UpdatingCallCache",
"startTime": "2019-10-25T03:22:21.626Z"
},
{
"endTime": "2019-10-25T02:38:03.645Z",
"description": "CallCacheReading",
"startTime": "2019-10-25T02:38:03.643Z"
},
{
"endTime": "2019-10-25T02:38:03.522Z",
"description": "Pending",
"startTime": "2019-10-25T02:38:03.522Z"
},
{
"endTime": "2019-10-25T02:38:03.542Z",
"description": "WaitingForValueStore",
"startTime": "2019-10-25T02:38:03.542Z"
},
{
"endTime": "2019-10-25T03:22:23.481Z",
"description": "UpdatingJobStore",
"startTime": "2019-10-25T03:22:22.481Z"
},
{
"endTime": "2019-10-25T02:38:03.643Z",
"description": "PreparingJob",
"startTime": "2019-10-25T02:38:03.542Z"
},
{
"endTime": "2019-10-25T02:38:03.542Z",
"description": "RequestingExecutionToken",
"startTime": "2019-10-25T02:38:03.522Z"
}
],
"backend": "BCS"
}
在上面的元数据中,有一项 callCaching,主要记录了如下信息:
综合上面的解释,我们看到实例中的这个 call, 是 GATK4_VariantDiscovery_pipeline_hg38 这个工作流的 HaplotypeCaller 这个 task 的10号 shard,Call Cache 情况如下:
如果遇到不符合预期的 task,可以通过如下步骤排查原因:
华为云CDN在中国大陆拥有2000+加速节点,在中国大陆境外拥有500+加速节点。 父主...
租用服务器 稳定运行的注意事项 服务器租用 是网站建设常用方式之一,很多站长认...
先从testerHome上关于测试平台的话题谈起,再来谈谈接口测试的痛点是什么,然后...
简介:StreamNative 联合创始人翟佳在本次演讲中介绍了下一代云原生消息流平台 A...
TOP云 (west.cn)12月14日消息,最新一期的DN榜刚刚出炉,本期榜单上多个数字 ...
哈喽各位同学们大家好呀 今天小编为大家分享开发者学院中课程“Spring Boot 2.5...
洛杉矶—2021 年 3 月 11 日—互联网名称与数字地址分配机构 (Internet Corporat...
你有没有想过? 电话那头声音甜美、对答如流的小姐姐 可能是个机器人。 这并不是...
阿里妹导读: 大数据与现有的科技手段结合,对大多数产业而言都能产生巨大的经济...
倒计时7天,阿里云开发者大会即将拉开帷幕。 据说这次大会上有很多神秘的“声音...