machine.json准备一次即可多次使用,跟以前的init和autotest是一样的,不再解释。
{
"api_version": "1.0",
"deepmd_version": "2.1.5",
"train" :[
{
"command": "dp",
"machine": {
"batch_type": "Shell",
"context_type": "LazyLocalContext",
"local_root" : "./",
"remote_root" : "./"
},
"resources": {
"number_node": 1,
"cpu_per_node": 12,
"gpu_per_node": 1,
"strategy" : {"if_cuda_multi_devices": true },
"queue_name": "zw",
"group_size": 4,
"source_list": ["/home/zxg/BeCu/conda_env/deepmd"]
}
}
],
"model_devi":
[{
"command": "lmp",
"machine": {
"batch_type": "Shell",
"context_type": "LazyLocalContext",
"local_root" : "./",
"remote_root" : "./"
},
"resources": {
"number_node": 1,
"cpu_per_node": 20,
"gpu_per_node": 1,
"strategy" : {"if_cuda_multi_devices": true },
"queue_name": "zw",
"group_size": 3500,
"source_list": ["/home/zxg/BeCu/conda_env/deepmd"]
}
}
],
"fp":
[
{
"command": "OMP_NUM_THREADS=1 mpirun -n 12 abacus",
"machine": {
"batch_type": "Shell",
"context_type": "LazyLocalContext",
"local_root" : "./",
"remote_root" : "./",
"clean_asynchronously": false
},
"resources":
{
"number_node": 1,
"cpu_per_node": 12,
"gpu_per_node": 1,
"group_size": 500,
"para_deg": 1,
"source_list": ["/home/zxg/BeCu/conda_env/abacus"]
}
}
]
}
作者:朱雪刚 邮箱:xuegangzhu@qq.com; 工作单位:石家庄学院 理学院/北京科学智能研究院(AISI)访问学者2023.07-2024.09,访问导师北京大学陈默涵; 徐张满仓 邮箱: xuzhangmancang@dp.tech