diff --git a/configs/setr/README.md b/configs/setr/README.md index 5673d9b63066699fb7a308261a6d071950d9a319..3a28635e9803c47769685941eef99629d41506bc 100644 --- a/configs/setr/README.md +++ b/configs/setr/README.md @@ -36,6 +36,23 @@ This head has two version head. } ``` +## Usage + +You can download the pretrain from [here](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_384-b3be5167.pth). Then you can convert its keys with the script `vit2mmseg.py` in the tools directory. + +```shell +python tools/model_converters/vit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vit2mmseg.py \ +jx_vit_large_p16_384-b3be5167.pth pretrain/vit_large_p16.pth +``` + +This script convert the model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + ## Results and models ### ADE20K diff --git a/configs/setr/setr_mla_512x512_160k_b8_ade20k.py b/configs/setr/setr_mla_512x512_160k_b8_ade20k.py index 6977dbacc1b50a2e6748ec4bcd936abeca164eda..e1a07ce5a38b36bcadb673e5b09c1d40b865ad7b 100644 --- a/configs/setr/setr_mla_512x512_160k_b8_ade20k.py +++ b/configs/setr/setr_mla_512x512_160k_b8_ade20k.py @@ -8,7 +8,8 @@ model = dict( backbone=dict( img_size=(512, 512), drop_rate=0., - init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')), + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), decode_head=dict(num_classes=150), auxiliary_head=[ dict( diff --git a/configs/setr/setr_naive_512x512_160k_b16_ade20k.py b/configs/setr/setr_naive_512x512_160k_b16_ade20k.py index 3b1f9d7d3bf6df2c6f4496df5fd0f733f9d7ddda..8ad8c9fe293628672d81a8b4532d28de4d141b94 100644 --- a/configs/setr/setr_naive_512x512_160k_b16_ade20k.py +++ b/configs/setr/setr_naive_512x512_160k_b16_ade20k.py @@ -8,7 +8,8 @@ model = dict( backbone=dict( img_size=(512, 512), drop_rate=0., - init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')), + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), decode_head=dict(num_classes=150), auxiliary_head=[ dict( diff --git a/configs/setr/setr_pup_512x512_160k_b16_ade20k.py b/configs/setr/setr_pup_512x512_160k_b16_ade20k.py index 68c3a2a4eea8ea1adbb1bda4de93216a3835edf7..83997a2bfedbaf97e6fb67e536175f7fbe38b1e6 100644 --- a/configs/setr/setr_pup_512x512_160k_b16_ade20k.py +++ b/configs/setr/setr_pup_512x512_160k_b16_ade20k.py @@ -8,7 +8,8 @@ model = dict( backbone=dict( img_size=(512, 512), drop_rate=0., - init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')), + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), decode_head=dict(num_classes=150), auxiliary_head=[ dict( diff --git a/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py index 3c2fc3af72c7f1a34e5a8b27bcdb25ca59892b7f..4237cd5aa7b4e84f7e74cf63641b6602d2b05a38 100644 --- a/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py @@ -6,7 +6,8 @@ model = dict( pretrained=None, backbone=dict( drop_rate=0, - init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')), + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), test_cfg=dict(mode='slide', crop_size=(768, 768), stride=(512, 512))) optimizer = dict( diff --git a/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py index 181f444ef15651b48d0c14a54d48228863e04853..0c6621ef141f2b7906629b5f2ff1081d46991c39 100644 --- a/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py @@ -7,7 +7,8 @@ model = dict( pretrained=None, backbone=dict( drop_rate=0., - init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')), + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), test_cfg=dict(mode='slide', crop_size=(768, 768), stride=(512, 512))) optimizer = dict( diff --git a/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py b/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py index 817a0296e3276b3b732f3fddfc70463218005a58..e108988a0ba06cb7a5852745f7cc6ffb0e6470eb 100644 --- a/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py +++ b/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py @@ -9,7 +9,8 @@ model = dict( pretrained=None, backbone=dict( drop_rate=0., - init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')), + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), auxiliary_head=[ dict( type='SETRUPHead',