Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .coveragerc +36 -0
- .dockerignore +19 -0
- .flake8 +9 -0
- .flake8.other +9 -0
- .flake8.speech +9 -0
- .gitattributes +88 -0
- .github/CODEOWNERS +4 -0
- .github/ISSUE_TEMPLATE/bug_report.md +42 -0
- .github/ISSUE_TEMPLATE/config.yml +2 -0
- .github/ISSUE_TEMPLATE/dev_container_bug_report.md +35 -0
- .github/ISSUE_TEMPLATE/feature_request.md +25 -0
- .github/PULL_REQUEST_TEMPLATE.md +57 -0
- .github/actions/cancel-workflow/action.yml +25 -0
- .github/actions/test-template/action.yml +227 -0
- .github/labeler.yml +55 -0
- .github/scripts/__init__.py +0 -0
- .github/scripts/components_to_run.py +84 -0
- .github/scripts/nemo_dependencies.py +400 -0
- .github/scripts/notify.py +79 -0
- .github/workflows/_build_container.yml +89 -0
- .github/workflows/_bump_mcore_tag.yml +56 -0
- .github/workflows/build-test-publish-wheel.yml +38 -0
- .github/workflows/changelog-build.yml +123 -0
- .github/workflows/cherry-pick-release-commit.yml +14 -0
- .github/workflows/cicd-approve-test-queue.yml +175 -0
- .github/workflows/cicd-main-nemo2.yml +299 -0
- .github/workflows/cicd-main-speech.yml +216 -0
- .github/workflows/cicd-main-testcopy.yml +472 -0
- .github/workflows/cicd-main-unit-tests.yml +212 -0
- .github/workflows/cicd-main.yml +450 -0
- .github/workflows/cicd-relabel-bot.yml +36 -0
- .github/workflows/close-inactive-issue-pr.yml +25 -0
- .github/workflows/code-formatting.yml +73 -0
- .github/workflows/code-init-file-checker.yml +23 -0
- .github/workflows/code-linting.yml +160 -0
- .github/workflows/codeql.yml +75 -0
- .github/workflows/community-bot.yml +15 -0
- .github/workflows/config/changelog-config.json +134 -0
- .github/workflows/config/codeql.yml +9 -0
- .github/workflows/copyright-check.yml +22 -0
- .github/workflows/gh-docs.yml +81 -0
- .github/workflows/install-test.yml +286 -0
- .github/workflows/labeler.yml +14 -0
- .github/workflows/mcore-tag-bump-bot.yml +62 -0
- .github/workflows/monitor-single-vm.yml +54 -0
- .github/workflows/monitor-vms.yml +54 -0
- .github/workflows/release-freeze.yml +85 -0
- .github/workflows/release.yml +48 -0
- .github/workflows/secrets-detector.yml +43 -0
- .github/workflows/update-buildcache.yml +110 -0
.coveragerc
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[run]
|
| 2 |
+
concurrency = thread,multiprocessing
|
| 3 |
+
omit =
|
| 4 |
+
/tmp/*
|
| 5 |
+
/home/TestData/*
|
| 6 |
+
/workspace/Megatron-LM/*
|
| 7 |
+
nemo/collections/multimodal/*
|
| 8 |
+
nemo/collections/multimodal_autoregressive/*
|
| 9 |
+
nemo/collections/vision/*
|
| 10 |
+
nemo/collections/diffusion/*
|
| 11 |
+
nemo/collections/nlp/*
|
| 12 |
+
|
| 13 |
+
nemo/collections/asr/*
|
| 14 |
+
nemo/collections/speechlm/*
|
| 15 |
+
nemo/collections/tts/*
|
| 16 |
+
|
| 17 |
+
# omit from audio
|
| 18 |
+
nemo/collections/audio/data/data_simulation.py
|
| 19 |
+
nemo/collections/audio/metrics/squim.py
|
| 20 |
+
nemo/collections/audio/losses/maxine/*
|
| 21 |
+
nemo/collections/audio/models/maxine/*
|
| 22 |
+
nemo/collections/audio/parts/utils/maxine.py
|
| 23 |
+
|
| 24 |
+
nemo/core/*
|
| 25 |
+
nemo/collections/common/*
|
| 26 |
+
|
| 27 |
+
/workspace/config-3.12.py
|
| 28 |
+
/workspace/config-3.py
|
| 29 |
+
/workspace/config.py
|
| 30 |
+
|
| 31 |
+
[paths]
|
| 32 |
+
source =
|
| 33 |
+
nemo/
|
| 34 |
+
/home/runner/work/NeMo/NeMo/nemo
|
| 35 |
+
/workspace/nemo
|
| 36 |
+
|
.dockerignore
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
*.pyd
|
| 5 |
+
.Python
|
| 6 |
+
env
|
| 7 |
+
pip-log.txt
|
| 8 |
+
pip-delete-this-directory.txt
|
| 9 |
+
.tox
|
| 10 |
+
.coverage
|
| 11 |
+
.coverage.*
|
| 12 |
+
.cache
|
| 13 |
+
nosetests.xml
|
| 14 |
+
coverage.xml
|
| 15 |
+
*,cover
|
| 16 |
+
*.log
|
| 17 |
+
.git
|
| 18 |
+
**/*.nemo
|
| 19 |
+
**/*.ckpt
|
.flake8
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[flake8]
|
| 2 |
+
max-line-length = 119
|
| 3 |
+
select =
|
| 4 |
+
F541, # f-string without any placeholders
|
| 5 |
+
F841, # local variable 'x' is assigned to but never used
|
| 6 |
+
F401, # 'x' imported but unused
|
| 7 |
+
E741, # ambiguous variable name 'l'
|
| 8 |
+
F821, # undefined name 'x'
|
| 9 |
+
E266, # too many leading '#' for block comment
|
.flake8.other
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[flake8]
|
| 2 |
+
max-line-length = 119
|
| 3 |
+
select =
|
| 4 |
+
F541, # f-string without any placeholders
|
| 5 |
+
F841, # local variable 'x' is assigned to but never used
|
| 6 |
+
F401, # 'x' imported but unused
|
| 7 |
+
E741, # ambiguous variable name 'l'
|
| 8 |
+
F821, # undefined name 'x'
|
| 9 |
+
E266, # too many leading '#' for block comment
|
.flake8.speech
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[flake8]
|
| 2 |
+
max-line-length = 119
|
| 3 |
+
select =
|
| 4 |
+
F541, # f-string without any placeholders
|
| 5 |
+
F841, # local variable 'x' is assigned to but never used
|
| 6 |
+
F401, # 'x' imported but unused
|
| 7 |
+
E741, # ambiguous variable name 'l'
|
| 8 |
+
F821, # undefined name 'x'
|
| 9 |
+
E266, # too many leading '#' for block comment
|
.gitattributes
CHANGED
|
@@ -33,3 +33,91 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
docs/source/asr/images/citrinet_vertical.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
docs/source/asr/images/conf-ensembles-overview.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
docs/source/asr/images/conformer_ctc.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
docs/source/asr/images/hat.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
docs/source/asr/images/hybrid_asr_tts_model.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
docs/source/asr/images/jasper_vertical.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
docs/source/asr/images/quartz_vertical.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
docs/source/asr/images/squeezeformer.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
docs/source/asr/speaker_diarization/images/aosc_3spk_example.gif filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
docs/source/asr/speaker_diarization/images/asr_sd_diagram.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
docs/source/asr/speaker_diarization/images/ats.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
docs/source/asr/speaker_diarization/images/cache_fifo_chunk.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
docs/source/asr/speaker_diarization/images/data_flow.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
docs/source/asr/speaker_diarization/images/e2e_and_cascaded_diar_systems.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
docs/source/asr/speaker_diarization/images/intro_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
docs/source/asr/speaker_diarization/images/loss_types.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
docs/source/asr/speaker_diarization/images/main_dataflow.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
docs/source/asr/speaker_diarization/images/ms_trade_off.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
docs/source/asr/speaker_diarization/images/msdd_train_and_infer.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
docs/source/asr/speaker_diarization/images/scale_weight_cnn.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
docs/source/asr/speaker_diarization/images/sortformer.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
docs/source/asr/speaker_diarization/images/streaming_steps.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
docs/source/asr/speaker_diarization/images/weighted_sum.png filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
docs/source/asr/speaker_recognition/images/ICASPP_SpeakerNet.png filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
docs/source/asr/speaker_recognition/images/titanet_network.png filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
docs/source/asr/speech_classification/images/marblenet_vertical.png filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
docs/source/asr/speech_classification/images/matchboxnet_vertical.png filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
docs/source/asr/speech_intent_slot/images/example.png filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
docs/source/core/whyntypes.gif filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
docs/source/nlp/entity_linking_overview.jpg filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
docs/source/nlp/nemo_megatron/customization_forward.png filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
docs/source/nlp/nemo_megatron/customization_module.png filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
docs/source/nlp/nemo_megatron/images/ddp.gif filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
docs/source/nlp/nemo_megatron/images/pnom.gif filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
docs/source/nlp/nemo_megatron/images/pp.gif filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
docs/source/nlp/nemo_megatron/images/pp_comm_overlap.png filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
docs/source/nlp/nemo_megatron/images/tp1.png filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
docs/source/nlp/nemo_megatron/images/tp2.png filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
docs/source/nlp/nemo_megatron/images/tp_comm_overlap.png filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
docs/source/tools/images/scrsh_2.png filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
docs/source/tools/images/scrsh_9.png filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
docs/source/tools/images/sde_mls_player.png filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
docs/source/tools/images/sde_player.png filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
docs/source/tools/images/sde_samples.png filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
docs/source/tts/images/audiocodec_model.png filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
docs/source/tts/images/data_labeling_pipeline.png filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
docs/source/tts/images/fastpitch_model.png filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
docs/source/tts/images/hifigan_d_model.png filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
docs/source/tts/images/hifigan_g_model.png filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
docs/source/tts/images/mixertts_model.png filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
docs/source/tts/images/radaligner_model.png filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
docs/source/tts/images/radtts_model.png filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
docs/source/tts/images/tacotron2_model.png filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
docs/source/tts/images/univnet_model.png filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
docs/source/tts/images/waveglow_model.png filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
nemo/collections/diffusion/assets/mixed_training.png filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
nemo/collections/diffusion/assets/pipeline_conditioning.png filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
nemo/collections/diffusion/assets/st_dit_hybrid_parallel.png filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
output_audio_context.wav filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
output_baked.wav filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
tools/speech_data_explorer/screenshot.png filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
tools/speech_data_simulator/pictures/audio_session.png filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
tutorials/asr/images/canary2_timestamps.png filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
tutorials/asr/images/multi_instance.png filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
tutorials/asr/images/multilang_asr_inference.png filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
tutorials/asr/images/multilang_asr_train.png filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
tutorials/asr/images/promptformat.png filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
tutorials/asr/images/speaker_injection.png filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
tutorials/asr/images/test_wer_wandb.png filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
tutorials/asr/images/tokenizer.png filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
tutorials/llm/llama/domain-adaptive-pretraining/code/imgs/tokenization_diagram.png filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
tutorials/llm/qwen/pruning-distillation/imgs/val_loss_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
tutorials/speaker_tasks/images/affinity_matrix_fusion.png filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
tutorials/speaker_tasks/images/ats.png filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
tutorials/speaker_tasks/images/cache_fifo_chunk.png filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
tutorials/speaker_tasks/images/cascaded_diar_diagram.png filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
tutorials/speaker_tasks/images/intro_comparison.png filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
tutorials/speaker_tasks/images/loss_types.png filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
tutorials/speaker_tasks/images/main_dataflow.png filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
tutorials/speaker_tasks/images/msdd_inputs.png filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
tutorials/speaker_tasks/images/msdd_output_loss.png filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
tutorials/speaker_tasks/images/msdd_train_and_infer.png filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
tutorials/speaker_tasks/images/multiscale_example.png filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
tutorials/speaker_tasks/images/sortformer.png filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
tutorials/speaker_tasks/images/streaming_steps.png filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
tutorials/tts/audio_samples/new_dict_entry.wav filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
tutorials/tts/audio_samples/phonemes_as_input.wav filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
tutorials/tts/images/tacotron2_diagram.png filter=lfs diff=lfs merge=lfs -text
|
.github/CODEOWNERS
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.github/ @pablo-garay @ko3n1g @thomasdhc @chtruong814
|
| 2 |
+
docker/Dockerfile.ci @pablo-garay @ko3n1g @thomasdhc @chtruong814
|
| 3 |
+
.pylintrc.* @pablo-garay @ko3n1g @thomasdhc @chtruong814
|
| 4 |
+
.flake8.* @pablo-garay @ko3n1g @thomasdhc @chtruong814
|
.github/ISSUE_TEMPLATE/bug_report.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
name: Bug report
|
| 3 |
+
about: Create a report to help us improve
|
| 4 |
+
title: ''
|
| 5 |
+
labels: bug
|
| 6 |
+
assignees: ''
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
**Describe the bug**
|
| 11 |
+
|
| 12 |
+
A clear and concise description of what the bug is.
|
| 13 |
+
|
| 14 |
+
**Steps/Code to reproduce bug**
|
| 15 |
+
|
| 16 |
+
Please list *minimal* steps or code snippet for us to be able to reproduce the bug.
|
| 17 |
+
|
| 18 |
+
A helpful guide on on how to craft a minimal bug report http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports.
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
**Expected behavior**
|
| 22 |
+
|
| 23 |
+
A clear and concise description of what you expected to happen.
|
| 24 |
+
|
| 25 |
+
**Environment overview (please complete the following information)**
|
| 26 |
+
|
| 27 |
+
- Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)]
|
| 28 |
+
- Method of NeMo install: [pip install or from source]. Please specify exact commands you used to install.
|
| 29 |
+
- If method of install is [Docker], provide `docker pull` & `docker run` commands used
|
| 30 |
+
|
| 31 |
+
**Environment details**
|
| 32 |
+
|
| 33 |
+
If NVIDIA docker image is used you don't need to specify these.
|
| 34 |
+
Otherwise, please provide:
|
| 35 |
+
- OS version
|
| 36 |
+
- PyTorch version
|
| 37 |
+
- Python version
|
| 38 |
+
|
| 39 |
+
**Additional context**
|
| 40 |
+
|
| 41 |
+
Add any other context about the problem here.
|
| 42 |
+
Example: GPU model
|
.github/ISSUE_TEMPLATE/config.yml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
blank_issues_enabled: false
|
| 2 |
+
|
.github/ISSUE_TEMPLATE/dev_container_bug_report.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
container pulled on date: mm/dd/yyyy
|
| 3 |
+
name: Dev container - Bug report
|
| 4 |
+
about: Create a report to help us improve
|
| 5 |
+
title: ''
|
| 6 |
+
labels: bug
|
| 7 |
+
assignees: ''
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
**Describe the bug**
|
| 12 |
+
|
| 13 |
+
A clear and concise description of what the bug is.
|
| 14 |
+
|
| 15 |
+
**Steps/Code to reproduce bug**
|
| 16 |
+
|
| 17 |
+
Please list *minimal* steps or code snippet for us to be able to reproduce the bug.
|
| 18 |
+
|
| 19 |
+
A helpful guide on on how to craft a minimal bug report http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports.
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
**Expected behavior**
|
| 23 |
+
|
| 24 |
+
A clear and concise description of what you expected to happen.
|
| 25 |
+
|
| 26 |
+
**Environment overview (please complete the following information)**
|
| 27 |
+
|
| 28 |
+
- Environment location: Docker
|
| 29 |
+
- Method of install: Please specify exact commands you used to install.
|
| 30 |
+
- If method of install is [Docker], provide `docker pull` & `docker run` commands used
|
| 31 |
+
|
| 32 |
+
**Additional context**
|
| 33 |
+
|
| 34 |
+
Add any other context about the problem here.
|
| 35 |
+
Example: GPU model
|
.github/ISSUE_TEMPLATE/feature_request.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
name: Feature request
|
| 3 |
+
about: Suggest an idea for this project
|
| 4 |
+
title: ''
|
| 5 |
+
labels: feature request
|
| 6 |
+
assignees: okuchaiev
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
**Is your feature request related to a problem? Please describe.**
|
| 11 |
+
|
| 12 |
+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
| 13 |
+
|
| 14 |
+
**Describe the solution you'd like**
|
| 15 |
+
|
| 16 |
+
A clear and concise description of what you want to happen.
|
| 17 |
+
Provide a code snippet on how new APIs/changes would be used by others.
|
| 18 |
+
|
| 19 |
+
**Describe alternatives you've considered**
|
| 20 |
+
|
| 21 |
+
A clear and concise description of any alternative solutions or features you've considered.
|
| 22 |
+
|
| 23 |
+
**Additional context**
|
| 24 |
+
|
| 25 |
+
Add any other context or screenshots about the feature request here.
|
.github/PULL_REQUEST_TEMPLATE.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
> [!IMPORTANT]
|
| 2 |
+
> The `Update branch` button must only be pressed in very rare occassions.
|
| 3 |
+
> An outdated branch is never blocking the merge of a PR.
|
| 4 |
+
> Please reach out to the automation team before pressing that button.
|
| 5 |
+
|
| 6 |
+
# What does this PR do ?
|
| 7 |
+
|
| 8 |
+
Add a one line overview of what this PR aims to accomplish.
|
| 9 |
+
|
| 10 |
+
**Collection**: [Note which collection this PR will affect]
|
| 11 |
+
|
| 12 |
+
# Changelog
|
| 13 |
+
|
| 14 |
+
- Add specific line by line info of high level changes in this PR.
|
| 15 |
+
|
| 16 |
+
# Usage
|
| 17 |
+
|
| 18 |
+
- You can potentially add a usage example below
|
| 19 |
+
|
| 20 |
+
```python
|
| 21 |
+
# Add a code snippet demonstrating how to use this
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
# GitHub Actions CI
|
| 25 |
+
|
| 26 |
+
The Jenkins CI system has been replaced by GitHub Actions self-hosted runners.
|
| 27 |
+
|
| 28 |
+
The GitHub Actions CI will run automatically when the "Run CICD" label is added to the PR.
|
| 29 |
+
To re-run CI remove and add the label again.
|
| 30 |
+
To run CI on an untrusted fork, a NeMo user with write access must first click "Approve and run".
|
| 31 |
+
|
| 32 |
+
# Before your PR is "Ready for review"
|
| 33 |
+
|
| 34 |
+
**Pre checks**:
|
| 35 |
+
|
| 36 |
+
- [ ] Make sure you read and followed [Contributor guidelines](https://github.com/NVIDIA/NeMo/blob/main/CONTRIBUTING.md)
|
| 37 |
+
- [ ] Did you write any new necessary tests?
|
| 38 |
+
- [ ] Did you add or update any necessary documentation?
|
| 39 |
+
- [ ] Does the PR affect components that are optional to install? (Ex: Numba, Pynini, Apex etc)
|
| 40 |
+
- [ ] Reviewer: Does the PR have correct import guards for all optional libraries?
|
| 41 |
+
|
| 42 |
+
**PR Type**:
|
| 43 |
+
|
| 44 |
+
- [ ] New Feature
|
| 45 |
+
- [ ] Bugfix
|
| 46 |
+
- [ ] Documentation
|
| 47 |
+
|
| 48 |
+
If you haven't finished some of the above items you can still open "Draft" PR.
|
| 49 |
+
|
| 50 |
+
## Who can review?
|
| 51 |
+
|
| 52 |
+
Anyone in the community is free to review the PR once the checks have passed.
|
| 53 |
+
[Contributor guidelines](https://github.com/NVIDIA/NeMo/blob/main/CONTRIBUTING.md) contains specific people who can review PRs to various areas.
|
| 54 |
+
|
| 55 |
+
# Additional Information
|
| 56 |
+
|
| 57 |
+
- Related to # (issue)
|
.github/actions/cancel-workflow/action.yml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Cancel Workflow
|
| 2 |
+
description: >
|
| 3 |
+
Cancels the current workflow run, i.e. all jobs. Useful if you want to cancel the rest of the workflow when one job
|
| 4 |
+
fails. Note that this will cause the workflow to appear cancelled, not failed.
|
| 5 |
+
|
| 6 |
+
# Cancelling the workflow in a post-script (like this:
|
| 7 |
+
# https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runspost; can also be done with
|
| 8 |
+
# this action: https://github.com/webiny/action-post-run, see Git history of this file) wouldn't help the status, it
|
| 9 |
+
# would still be cancelled. It actually indeed is, but it would be nicer to set it to failed, but there seems to be no
|
| 10 |
+
# way to do this.
|
| 11 |
+
|
| 12 |
+
runs:
|
| 13 |
+
using: "composite"
|
| 14 |
+
steps:
|
| 15 |
+
- name: Cancel Workflow
|
| 16 |
+
# # Fork PRs won't have a token with write access to Actions, thus won't be able to cancel the workflow.
|
| 17 |
+
# if: github.event.pull_request == '' || github.event.pull_request.head.repo.fork == false
|
| 18 |
+
shell: bash
|
| 19 |
+
run: |
|
| 20 |
+
curl --verbose \
|
| 21 |
+
-X POST \
|
| 22 |
+
-H "Accept: application/vnd.github+json" \
|
| 23 |
+
-H "Authorization: Bearer ${{ github.token }}" \
|
| 24 |
+
-H "X-GitHub-Api-Version: 2022-11-28" \
|
| 25 |
+
https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/cancel
|
.github/actions/test-template/action.yml
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: "Test Template"
|
| 15 |
+
description: "Template for running NeMo tests in a containerized environment"
|
| 16 |
+
|
| 17 |
+
inputs:
|
| 18 |
+
runner:
|
| 19 |
+
description: "Runner to use for test"
|
| 20 |
+
required: true
|
| 21 |
+
timeout:
|
| 22 |
+
description: "Max runtime of test in minutes"
|
| 23 |
+
required: false
|
| 24 |
+
default: "10"
|
| 25 |
+
script:
|
| 26 |
+
description: "Test script to execute"
|
| 27 |
+
required: true
|
| 28 |
+
after_script:
|
| 29 |
+
description: "Script to run after main test"
|
| 30 |
+
required: false
|
| 31 |
+
default: ":"
|
| 32 |
+
is_optional:
|
| 33 |
+
description: "Failure will cancel all other tests if set to true"
|
| 34 |
+
required: false
|
| 35 |
+
default: "false"
|
| 36 |
+
is_unit_test:
|
| 37 |
+
description: "Upload coverage as unit test"
|
| 38 |
+
required: false
|
| 39 |
+
default: "false"
|
| 40 |
+
tests_to_run:
|
| 41 |
+
description: "Tests to run"
|
| 42 |
+
required: false
|
| 43 |
+
default: '["all"]'
|
| 44 |
+
image:
|
| 45 |
+
description: "Image to use for test"
|
| 46 |
+
required: false
|
| 47 |
+
default: "nemo_container"
|
| 48 |
+
cpu-only:
|
| 49 |
+
description: "Run tests on CPU only"
|
| 50 |
+
required: false
|
| 51 |
+
default: "false"
|
| 52 |
+
runs:
|
| 53 |
+
using: "composite"
|
| 54 |
+
steps:
|
| 55 |
+
- name: Noop
|
| 56 |
+
shell: bash
|
| 57 |
+
run: |
|
| 58 |
+
chmod -R u+rwX ${{ github.run_id }}
|
| 59 |
+
echo "noop"
|
| 60 |
+
|
| 61 |
+
- name: Docker system cleanup
|
| 62 |
+
shell: bash
|
| 63 |
+
run: |
|
| 64 |
+
docker system prune -af --filter "until=24h" --filter "label!=nemo.pr_number=${{ github.event.pull_request.number || 0 }}" --force || true
|
| 65 |
+
|
| 66 |
+
- name: Docker pull image
|
| 67 |
+
shell: bash
|
| 68 |
+
run: |
|
| 69 |
+
docker pull nemoci.azurecr.io/${{ inputs.image }}:${{ github.run_id }}
|
| 70 |
+
|
| 71 |
+
- name: Clean repos
|
| 72 |
+
shell: bash
|
| 73 |
+
run: |
|
| 74 |
+
|
| 75 |
+
- name: Create UUID
|
| 76 |
+
id: uuid
|
| 77 |
+
shell: bash
|
| 78 |
+
run: |
|
| 79 |
+
echo "id=$(uuidgen)" >> "$GITHUB_OUTPUT"
|
| 80 |
+
|
| 81 |
+
- name: Checkout NeMo
|
| 82 |
+
uses: actions/checkout@v4
|
| 83 |
+
env:
|
| 84 |
+
DIR: ${{ github.run_id }}
|
| 85 |
+
with:
|
| 86 |
+
path: ${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo
|
| 87 |
+
|
| 88 |
+
- name: Start container
|
| 89 |
+
shell: bash
|
| 90 |
+
env:
|
| 91 |
+
DIR: ${{ github.run_id }}
|
| 92 |
+
run: |
|
| 93 |
+
mkdir -p $DIR
|
| 94 |
+
|
| 95 |
+
# Map of runner names to GPU device configurations
|
| 96 |
+
declare -A GPU_CONFIGS=(
|
| 97 |
+
["myVm-01"]="0,1"
|
| 98 |
+
["myVm-02"]="2,3"
|
| 99 |
+
["myVm-03"]="4,5"
|
| 100 |
+
["myVm-04"]="6,7"
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
ARG=("")
|
| 104 |
+
if [[ "${{ inputs.cpu-only }}" == "false" ]]; then
|
| 105 |
+
ARG=("--runtime=nvidia --gpus all")
|
| 106 |
+
fi
|
| 107 |
+
|
| 108 |
+
cmd=$(cat <<RUN_TEST_EOF
|
| 109 |
+
#!/bin/bash
|
| 110 |
+
docker container rm -f nemo_container_${{ github.run_id }}_${{ inputs.runner }} || true
|
| 111 |
+
docker run \
|
| 112 |
+
--rm \
|
| 113 |
+
-d \
|
| 114 |
+
--name nemo_container_${{ github.run_id }}_${{ inputs.runner }} ${ARG[@]} \
|
| 115 |
+
--shm-size=64g \
|
| 116 |
+
--env TRANSFORMERS_OFFLINE=0 \
|
| 117 |
+
--env HYDRA_FULL_ERROR=1 \
|
| 118 |
+
--env HF_HOME=/home/TestData/HF_HOME \
|
| 119 |
+
--env RUN_ID=${{ github.run_id }} \
|
| 120 |
+
--volume $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo:/workspace \
|
| 121 |
+
--volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/${{ inputs.image }}:${{ github.run_id }} \
|
| 122 |
+
bash -c "sleep $(( ${{ inputs.timeout }} * 60 + 60 ))"
|
| 123 |
+
RUN_TEST_EOF
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
echo "$cmd" | tee "$DIR/retry_job.sh"
|
| 127 |
+
bash $DIR/retry_job.sh
|
| 128 |
+
|
| 129 |
+
- name: Create run-script
|
| 130 |
+
id: create
|
| 131 |
+
env:
|
| 132 |
+
DIR: ${{ github.run_id }}
|
| 133 |
+
shell: bash
|
| 134 |
+
run: |
|
| 135 |
+
COVERAGE_PREFIX=$([[ "${{ inputs.is_unit_test }}" == "true" ]] && echo "unit-test" || echo "e2e")
|
| 136 |
+
echo "coverage-prefix=$COVERAGE_PREFIX" | tee -a "$GITHUB_OUTPUT"
|
| 137 |
+
|
| 138 |
+
mkdir -p $DIR
|
| 139 |
+
rm $DIR/.coverage || true
|
| 140 |
+
rm $DIR/err.log || true
|
| 141 |
+
|
| 142 |
+
cmd=$(cat <<RUN_TEST_EOF
|
| 143 |
+
#!/bin/bash
|
| 144 |
+
|
| 145 |
+
(
|
| 146 |
+
set -e
|
| 147 |
+
|
| 148 |
+
docker exec -t nemo_container_${{ github.run_id }}_${{ inputs.runner }} bash -c '\
|
| 149 |
+
cp -r /opt/Megatron-LM/ /workspace/ && \
|
| 150 |
+
bash tests/functional_tests/${{ inputs.script }}.sh && \
|
| 151 |
+
echo "Finished successfully." || echo "Did not finish."'
|
| 152 |
+
) 2>&1 | tee $DIR/err.log
|
| 153 |
+
|
| 154 |
+
RUN_TEST_EOF
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
echo "timeout_in_seconds=$(( ${{ inputs.timeout }} * 60 ))" | tee -a "$GITHUB_OUTPUT"
|
| 158 |
+
echo "$cmd" | tee "$DIR/job.sh"
|
| 159 |
+
|
| 160 |
+
- name: Run main script
|
| 161 |
+
uses: nick-fields/retry@v3
|
| 162 |
+
with:
|
| 163 |
+
timeout_seconds: ${{ steps.create.outputs.timeout_in_seconds }}
|
| 164 |
+
max_attempts: 3
|
| 165 |
+
shell: bash
|
| 166 |
+
retry_on: timeout
|
| 167 |
+
command: /bin/bash ${{ github.run_id }}/job.sh
|
| 168 |
+
on_retry_command: /bin/bash ${{ github.run_id }}/retry_job.sh
|
| 169 |
+
|
| 170 |
+
- name: Check result
|
| 171 |
+
id: check
|
| 172 |
+
shell: bash
|
| 173 |
+
env:
|
| 174 |
+
DIR: ${{ github.run_id }}
|
| 175 |
+
run: |
|
| 176 |
+
cat $DIR/err.log
|
| 177 |
+
|
| 178 |
+
log=$(tail -c 2000 $DIR/err.log | base64 -w 0)
|
| 179 |
+
echo "log=$log" >> "$GITHUB_OUTPUT"
|
| 180 |
+
|
| 181 |
+
potential_infra_failure=$(cat $DIR/err.log | grep -Eqiw "device" && echo true || echo false)
|
| 182 |
+
echo "potential_infra_failure=$potential_infra_failure" >> "$GITHUB_OUTPUT"
|
| 183 |
+
|
| 184 |
+
docker exec nemo_container_${{ github.run_id }}_${{ inputs.runner }} coverage combine
|
| 185 |
+
docker exec nemo_container_${{ github.run_id }}_${{ inputs.runner }} coverage xml
|
| 186 |
+
docker cp nemo_container_${{ github.run_id }}_${{ inputs.runner }}:/workspace/.coverage $DIR/.coverage
|
| 187 |
+
docker cp nemo_container_${{ github.run_id }}_${{ inputs.runner }}:/workspace/coverage.xml $DIR/coverage.xml
|
| 188 |
+
|
| 189 |
+
coverage_report=coverage-${{ steps.create.outputs.coverage-prefix }}-${{ github.run_id }}-$(uuidgen)
|
| 190 |
+
echo "coverage_report=$coverage_report" >> "$GITHUB_OUTPUT"
|
| 191 |
+
|
| 192 |
+
IS_SUCCESS=$(tail -n 1 $DIR/err.log | grep -q "Finished successfully." && echo "true" || echo "false")
|
| 193 |
+
|
| 194 |
+
if [[ "$IS_SUCCESS" == "false" && "${{ inputs.is_optional }}" == "true" ]]; then
|
| 195 |
+
echo "::warning:: Test failed, but displayed as successful because it is marked as optional."
|
| 196 |
+
IS_SUCCESS=true
|
| 197 |
+
fi
|
| 198 |
+
|
| 199 |
+
if [[ "$IS_SUCCESS" == "false" ]]; then
|
| 200 |
+
echo Test did not finish successfully.
|
| 201 |
+
exit 1
|
| 202 |
+
fi
|
| 203 |
+
|
| 204 |
+
exit $EXIT_CODE
|
| 205 |
+
|
| 206 |
+
- name: Test coverage
|
| 207 |
+
shell: bash -x -e -u -o pipefail {0}
|
| 208 |
+
run: |
|
| 209 |
+
docker exec -t nemo_container_${{ github.run_id }}_${{ inputs.runner }} coverage report -i
|
| 210 |
+
|
| 211 |
+
- name: Upload artifacts
|
| 212 |
+
uses: actions/upload-artifact@v4
|
| 213 |
+
if: ${{ steps.check.outputs.coverage_report != 'none' }}
|
| 214 |
+
with:
|
| 215 |
+
name: ${{ steps.check.outputs.coverage_report }}
|
| 216 |
+
path: |
|
| 217 |
+
${{ github.run_id }}/coverage.xml
|
| 218 |
+
${{ github.run_id }}/.coverage
|
| 219 |
+
include-hidden-files: true
|
| 220 |
+
|
| 221 |
+
- name: Container shutdown
|
| 222 |
+
if: always()
|
| 223 |
+
shell: bash
|
| 224 |
+
run: |
|
| 225 |
+
docker exec nemo_container_${{ github.run_id }}_${{ inputs.runner }} bash -c "chown -R $(id -u):$(id -g) /workspace"
|
| 226 |
+
rm -rf $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }} || true
|
| 227 |
+
docker container rm -f nemo_container_${{ github.run_id }}_${{ inputs.runner }} || true
|
.github/labeler.yml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ASR:
|
| 2 |
+
- nemo/collections/asr/**/*
|
| 3 |
+
- examples/asr/**/*
|
| 4 |
+
- tutorials/asr/**/*
|
| 5 |
+
- docs/source/asr/**/*
|
| 6 |
+
- tests/collections/asr/**
|
| 7 |
+
|
| 8 |
+
NLP:
|
| 9 |
+
- nemo/collections/nlp/**/*
|
| 10 |
+
- examples/nlp/**/*
|
| 11 |
+
- tutorials/nlp/**/*
|
| 12 |
+
- docs/source/nlp/**/*
|
| 13 |
+
- tests/collections/nlp/**
|
| 14 |
+
|
| 15 |
+
Multi Modal:
|
| 16 |
+
- nemo/collections/multimodal/**/*
|
| 17 |
+
- examples/multimodal/**/*
|
| 18 |
+
- tutorials/multimodal/**/*
|
| 19 |
+
- docs/source/multimodal/**/*
|
| 20 |
+
- tests/collections/multimodal/**
|
| 21 |
+
|
| 22 |
+
Speaker Tasks:
|
| 23 |
+
- examples/speaker_tasks/**/*
|
| 24 |
+
- tutorials/speaker_tasks/**/*
|
| 25 |
+
|
| 26 |
+
TTS:
|
| 27 |
+
- nemo/collections/tts/**/*
|
| 28 |
+
- nemo/collections/common/tokenizers/text_to_speech/**
|
| 29 |
+
- examples/tts/**/*
|
| 30 |
+
- tutorials/tts/**/*
|
| 31 |
+
- docs/source/tts/**/*
|
| 32 |
+
- scripts/dataset_processing/tts/**
|
| 33 |
+
- scripts/tts_dataset_files/**
|
| 34 |
+
- tests/collections/tts/**
|
| 35 |
+
- tests/collections/common/tokenizers/text_to_speech/**
|
| 36 |
+
|
| 37 |
+
Audio:
|
| 38 |
+
- nemo/collections/audio/**/*
|
| 39 |
+
- examples/audio/**/*
|
| 40 |
+
- tutorials/audio/**/*
|
| 41 |
+
- docs/source/audio/**/*
|
| 42 |
+
- tests/collections/audio/**
|
| 43 |
+
|
| 44 |
+
core:
|
| 45 |
+
- nemo/core/**/*
|
| 46 |
+
- tests/core/**
|
| 47 |
+
|
| 48 |
+
common:
|
| 49 |
+
- nemo/collections/common/**/*
|
| 50 |
+
|
| 51 |
+
CI:
|
| 52 |
+
- .github/**/*
|
| 53 |
+
- Jenkinsfile
|
| 54 |
+
- Dockerfile
|
| 55 |
+
- ci.groovy
|
.github/scripts/__init__.py
ADDED
|
File without changes
|
.github/scripts/components_to_run.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
#!/usr/bin/env python3
|
| 16 |
+
import json
|
| 17 |
+
import os
|
| 18 |
+
import sys
|
| 19 |
+
from typing import Any, Dict, List, Set
|
| 20 |
+
|
| 21 |
+
import click
|
| 22 |
+
import git
|
| 23 |
+
|
| 24 |
+
import nemo_dependencies
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_changed_files(source_sha: str, target_sha: str) -> List[str]:
|
| 28 |
+
"""
|
| 29 |
+
Fetch the changelog between current branch and main.
|
| 30 |
+
Returns a list of dictionaries containing commit information.
|
| 31 |
+
"""
|
| 32 |
+
try:
|
| 33 |
+
# Initialize the repo object - go up two levels from this file's location
|
| 34 |
+
repo = git.Repo(os.path.join(os.path.dirname(__file__), "..", ".."))
|
| 35 |
+
|
| 36 |
+
# Get the diff between target and source
|
| 37 |
+
diff_index = repo.commit(target_sha).diff(repo.commit(source_sha))
|
| 38 |
+
|
| 39 |
+
# Get just the changed filenames
|
| 40 |
+
changed_files = []
|
| 41 |
+
for diff in diff_index:
|
| 42 |
+
changed_files.append(diff.a_path if diff.a_path else diff.b_path)
|
| 43 |
+
|
| 44 |
+
return changed_files
|
| 45 |
+
|
| 46 |
+
except git.exc.GitCommandError as e:
|
| 47 |
+
print(f"Error fetching changelog: {e}", file=sys.stderr)
|
| 48 |
+
sys.exit(1)
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f"Unexpected error: {e}", file=sys.stderr)
|
| 51 |
+
sys.exit(1)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@click.command()
|
| 55 |
+
@click.option('--source-sha', type=str, required=True, help='Source commit SHA')
|
| 56 |
+
@click.option('--target-sha', type=str, required=True, help='Target commit sha')
|
| 57 |
+
def main(source_sha: str, target_sha: str):
|
| 58 |
+
"""
|
| 59 |
+
Main function to fetch and output the changelog and changed files.
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
# Output unique changed files
|
| 63 |
+
print("\nChanged files:")
|
| 64 |
+
changed_files = get_changed_files(source_sha, target_sha)
|
| 65 |
+
|
| 66 |
+
print(json.dumps(sorted(list(changed_files)), indent=2))
|
| 67 |
+
|
| 68 |
+
nemo_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 69 |
+
# Build dependency graph
|
| 70 |
+
dependencies = nemo_dependencies.build_dependency_graph(nemo_root)
|
| 71 |
+
|
| 72 |
+
test_modules: List[str] = []
|
| 73 |
+
for changed_file in changed_files:
|
| 74 |
+
if changed_file in dependencies:
|
| 75 |
+
test_modules.extend(dependencies[changed_file])
|
| 76 |
+
|
| 77 |
+
test_modules = list(set(test_modules))
|
| 78 |
+
|
| 79 |
+
with open("test_modules.json", "w", encoding="utf-8") as f:
|
| 80 |
+
json.dump(test_modules, f)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
if __name__ == "__main__":
|
| 84 |
+
main()
|
.github/scripts/nemo_dependencies.py
ADDED
|
@@ -0,0 +1,400 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
+
# you may not use this file except in compliance with the License.
|
| 6 |
+
# You may obtain a copy of the License at
|
| 7 |
+
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
+
#
|
| 10 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
+
# See the License for the specific language governing permissions and
|
| 14 |
+
# limitations under the License.
|
| 15 |
+
|
| 16 |
+
"""
|
| 17 |
+
NeMo dependency structure definition.
|
| 18 |
+
This module analyzes the codebase to determine internal dependencies between NeMo collections and core components.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import ast
|
| 22 |
+
import json
|
| 23 |
+
import os
|
| 24 |
+
from typing import Dict, List, Set
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def find_python_files(directory: str) -> List[str]:
|
| 28 |
+
"""Find all Python files in the given directory and its subdirectories."""
|
| 29 |
+
python_files = []
|
| 30 |
+
# Look in nemo directory and other relevant directories
|
| 31 |
+
relevant_dirs = ['nemo', 'scripts', 'examples', 'tests']
|
| 32 |
+
|
| 33 |
+
for dir_name in relevant_dirs:
|
| 34 |
+
dir_path = os.path.join(directory, dir_name)
|
| 35 |
+
if os.path.exists(dir_path):
|
| 36 |
+
for root, _, files in os.walk(dir_path):
|
| 37 |
+
for file in files:
|
| 38 |
+
if file.endswith('.py'):
|
| 39 |
+
python_files.append(os.path.join(root, file))
|
| 40 |
+
|
| 41 |
+
return python_files
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def analyze_imports(nemo_root: str, file_path: str) -> Set[str]:
|
| 45 |
+
"""Analyze a Python file and return its NeMo package dependencies using AST parsing."""
|
| 46 |
+
imports = set()
|
| 47 |
+
visited = set() # Track visited modules to prevent circular imports
|
| 48 |
+
|
| 49 |
+
def get_init_imports(module_path: str, depth: int = 0) -> Dict[str, str]:
|
| 50 |
+
"""Recursively analyze imports from __init__.py files and map them to their final destinations."""
|
| 51 |
+
# Prevent infinite recursion
|
| 52 |
+
if depth > 10 or module_path in visited: # Limit depth to 10 levels
|
| 53 |
+
return {}
|
| 54 |
+
|
| 55 |
+
visited.add(module_path)
|
| 56 |
+
init_path = os.path.join(module_path, '__init__.py')
|
| 57 |
+
if not os.path.exists(init_path):
|
| 58 |
+
return {}
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
with open(init_path, 'r', encoding='utf-8') as f:
|
| 62 |
+
init_tree = ast.parse(f.read(), filename=init_path)
|
| 63 |
+
|
| 64 |
+
import_map = {}
|
| 65 |
+
for node in ast.walk(init_tree):
|
| 66 |
+
if isinstance(node, ast.ImportFrom) and node.module and node.module.startswith('nemo.'):
|
| 67 |
+
if node.names:
|
| 68 |
+
for name in node.names:
|
| 69 |
+
if name.name == '*':
|
| 70 |
+
continue
|
| 71 |
+
|
| 72 |
+
# Get the full module path for the import
|
| 73 |
+
module_parts = node.module.split('.')
|
| 74 |
+
module_dir = os.path.join(nemo_root, *module_parts)
|
| 75 |
+
|
| 76 |
+
# If the imported module has an __init__.py, recursively analyze it
|
| 77 |
+
if os.path.exists(os.path.join(module_dir, '__init__.py')):
|
| 78 |
+
sub_imports = get_init_imports(module_dir, depth + 1)
|
| 79 |
+
if name.name in sub_imports:
|
| 80 |
+
import_map[name.name] = sub_imports[name.name]
|
| 81 |
+
else:
|
| 82 |
+
# If not found in sub-imports, it might be from the module itself
|
| 83 |
+
module_file = os.path.join(module_dir, f"{module_parts[-1]}.py")
|
| 84 |
+
if os.path.exists(module_file):
|
| 85 |
+
import_map[name.name] = f"{node.module}.{name.name}"
|
| 86 |
+
else:
|
| 87 |
+
# Direct module import
|
| 88 |
+
import_map[name.name] = f"{node.module}.{name.name}"
|
| 89 |
+
|
| 90 |
+
return import_map
|
| 91 |
+
except Exception as e:
|
| 92 |
+
print(f"Error analyzing {init_path}: {e}")
|
| 93 |
+
return {}
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 97 |
+
tree = ast.parse(f.read(), filename=file_path)
|
| 98 |
+
|
| 99 |
+
for node in ast.walk(tree):
|
| 100 |
+
if isinstance(node, ast.ImportFrom) and node.module and node.module.startswith('nemo.'):
|
| 101 |
+
# Split the module path
|
| 102 |
+
parts = node.module.split('.')
|
| 103 |
+
|
| 104 |
+
if len(parts) == 1:
|
| 105 |
+
continue
|
| 106 |
+
|
| 107 |
+
if len(parts) >= 2:
|
| 108 |
+
module_type = parts[1]
|
| 109 |
+
|
| 110 |
+
if module_type == 'collections':
|
| 111 |
+
if len(parts) == 2:
|
| 112 |
+
continue
|
| 113 |
+
if node.names:
|
| 114 |
+
for name in node.names:
|
| 115 |
+
if name.name == '*':
|
| 116 |
+
continue
|
| 117 |
+
|
| 118 |
+
# Check if this is an __init__ import
|
| 119 |
+
module_path = os.path.join(nemo_root, *parts)
|
| 120 |
+
init_imports = get_init_imports(module_path)
|
| 121 |
+
|
| 122 |
+
if name.name in init_imports:
|
| 123 |
+
# Use the mapped import path
|
| 124 |
+
imports.add(init_imports[name.name])
|
| 125 |
+
else:
|
| 126 |
+
imports.add(f"{node.module}.{name.name}")
|
| 127 |
+
|
| 128 |
+
elif module_type in find_top_level_packages(nemo_root):
|
| 129 |
+
if node.names:
|
| 130 |
+
for name in node.names:
|
| 131 |
+
if name.name == '*':
|
| 132 |
+
continue
|
| 133 |
+
|
| 134 |
+
# Check if this is an __init__ import
|
| 135 |
+
module_path = os.path.join(nemo_root, *parts)
|
| 136 |
+
init_imports = get_init_imports(module_path)
|
| 137 |
+
|
| 138 |
+
if name.name in init_imports:
|
| 139 |
+
# Use the mapped import path
|
| 140 |
+
imports.add(init_imports[name.name])
|
| 141 |
+
else:
|
| 142 |
+
imports.add(f"{node.module}.{name.name}")
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
print(f"Error analyzing {file_path}: {e}")
|
| 146 |
+
|
| 147 |
+
return imports
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def find_top_level_packages(nemo_root: str) -> List[str]:
|
| 151 |
+
"""Find all top-level packages under nemo directory."""
|
| 152 |
+
packages: List[str] = []
|
| 153 |
+
nemo_dir = os.path.join(nemo_root, 'nemo')
|
| 154 |
+
tests_dir = os.path.join(nemo_root, 'tests')
|
| 155 |
+
|
| 156 |
+
if not os.path.exists(nemo_dir):
|
| 157 |
+
print(f"Warning: nemo directory not found at {nemo_dir}")
|
| 158 |
+
return packages
|
| 159 |
+
if not os.path.exists(tests_dir):
|
| 160 |
+
print(f"Warning: nemo directory not found at {nemo_dir}")
|
| 161 |
+
return packages
|
| 162 |
+
|
| 163 |
+
for item in os.listdir(nemo_dir) + os.listdir(tests_dir):
|
| 164 |
+
item_path = os.path.join(nemo_dir, item)
|
| 165 |
+
if os.path.isdir(item_path) and not item.startswith('__'):
|
| 166 |
+
packages.append(item)
|
| 167 |
+
|
| 168 |
+
return sorted(packages)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def find_collection_modules(nemo_root: str) -> Dict[str, List[str]]:
|
| 172 |
+
"""Find all modules within collections."""
|
| 173 |
+
collection_modules: Dict[str, List[str]] = {}
|
| 174 |
+
collections_dir = os.path.join(nemo_root, 'nemo', 'collections')
|
| 175 |
+
|
| 176 |
+
if not os.path.exists(collections_dir):
|
| 177 |
+
print(f"Warning: collections directory not found at {collections_dir}")
|
| 178 |
+
return collection_modules
|
| 179 |
+
|
| 180 |
+
for collection in os.listdir(collections_dir):
|
| 181 |
+
collection_path = os.path.join(collections_dir, collection)
|
| 182 |
+
if os.path.isdir(collection_path) and not collection.startswith('__'):
|
| 183 |
+
collection_modules[f"nemo.collections.{collection}"] = []
|
| 184 |
+
|
| 185 |
+
return collection_modules
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
|
| 189 |
+
"""Build a dependency graph by analyzing all Python files."""
|
| 190 |
+
# Find all top-level packages
|
| 191 |
+
top_level_packages = find_top_level_packages(nemo_root)
|
| 192 |
+
print(f"Found top-level packages: {top_level_packages}")
|
| 193 |
+
|
| 194 |
+
dependencies: Dict[str, List[str]] = {}
|
| 195 |
+
|
| 196 |
+
for file_path in find_python_files(nemo_root):
|
| 197 |
+
relative_path = os.path.relpath(file_path, nemo_root)
|
| 198 |
+
|
| 199 |
+
parts = relative_path.split(os.sep)
|
| 200 |
+
|
| 201 |
+
if len(parts) == 1 or (parts[0] != "nemo" and parts[0] != "tests"):
|
| 202 |
+
continue
|
| 203 |
+
|
| 204 |
+
module_path = relative_path.replace(".py", "").replace("/", ".")
|
| 205 |
+
if parts[1] in top_level_packages and parts[1] != 'collections' and parts[0] != 'tests':
|
| 206 |
+
dependencies[module_path] = list(set(analyze_imports(nemo_root, file_path)))
|
| 207 |
+
elif parts[0] == 'tests':
|
| 208 |
+
dependencies[module_path] = [relative_path.replace("/", ".").replace(".py", "")]
|
| 209 |
+
elif parts[1] == 'collections':
|
| 210 |
+
dependencies[module_path] = list(set(analyze_imports(nemo_root, file_path)))
|
| 211 |
+
|
| 212 |
+
# Flip the dependency graph to show reverse dependencies
|
| 213 |
+
reverse_dependencies: Dict[str, List[str]] = {}
|
| 214 |
+
# Handle top-level package dependencies
|
| 215 |
+
for package, deps in dependencies.items():
|
| 216 |
+
for dep in deps:
|
| 217 |
+
if dep not in reverse_dependencies:
|
| 218 |
+
reverse_dependencies[dep] = []
|
| 219 |
+
reverse_dependencies[dep].append(package)
|
| 220 |
+
dependencies = reverse_dependencies
|
| 221 |
+
|
| 222 |
+
# Follow and extend records with transitive dependencies
|
| 223 |
+
transitive_dependencies = dependencies.copy()
|
| 224 |
+
# Keep iterating until no new dependencies are added
|
| 225 |
+
while True:
|
| 226 |
+
changes_made = False
|
| 227 |
+
new_dependencies = transitive_dependencies.copy()
|
| 228 |
+
|
| 229 |
+
# For each package and its direct dependencies
|
| 230 |
+
for package, deps in transitive_dependencies.items():
|
| 231 |
+
# For each direct dependency
|
| 232 |
+
for dep in deps:
|
| 233 |
+
# If the dependency has its own dependencies
|
| 234 |
+
if dep in transitive_dependencies:
|
| 235 |
+
# Add those transitive dependencies to the original package
|
| 236 |
+
for transitive_dep in transitive_dependencies[dep]:
|
| 237 |
+
if transitive_dep not in new_dependencies[package]:
|
| 238 |
+
new_dependencies[package].append(transitive_dep)
|
| 239 |
+
changes_made = True
|
| 240 |
+
|
| 241 |
+
# Update dependencies with new transitive ones
|
| 242 |
+
transitive_dependencies = new_dependencies
|
| 243 |
+
|
| 244 |
+
# If no new dependencies were added, we're done
|
| 245 |
+
if not changes_made:
|
| 246 |
+
break
|
| 247 |
+
|
| 248 |
+
dependencies = transitive_dependencies
|
| 249 |
+
|
| 250 |
+
# Simplify values: Either top-level package or collection module
|
| 251 |
+
simplified_dependencies: Dict[str, List[str]] = {}
|
| 252 |
+
for package, deps in dependencies.items():
|
| 253 |
+
package_parts = package.split('.')
|
| 254 |
+
|
| 255 |
+
if package_parts[0] == "tests":
|
| 256 |
+
simplified_package_path = f"{os.path.join(*package_parts)}.py"
|
| 257 |
+
elif os.path.isfile((file_path := f"{os.path.join(*package_parts[:-1])}.py")):
|
| 258 |
+
simplified_package_path = file_path
|
| 259 |
+
elif os.path.isdir((file_path := f"{os.path.join(*package_parts[:-1])}")):
|
| 260 |
+
simplified_package_path = file_path
|
| 261 |
+
else:
|
| 262 |
+
simplified_package_path = package
|
| 263 |
+
|
| 264 |
+
for dep in deps:
|
| 265 |
+
dep_parts = dep.split('.')
|
| 266 |
+
|
| 267 |
+
if simplified_package_path not in simplified_dependencies:
|
| 268 |
+
simplified_dependencies[simplified_package_path] = []
|
| 269 |
+
|
| 270 |
+
if (
|
| 271 |
+
len(dep_parts) >= 2
|
| 272 |
+
and (dep_parts[1] in find_top_level_packages(nemo_root))
|
| 273 |
+
and dep_parts[1] != 'collections'
|
| 274 |
+
):
|
| 275 |
+
simplified_dependencies[simplified_package_path].append(f"{dep_parts[0]}.{dep_parts[1]}")
|
| 276 |
+
elif dep_parts[0] == "tests":
|
| 277 |
+
simplified_dependencies[simplified_package_path].append(".".join(dep_parts))
|
| 278 |
+
elif len(dep_parts) >= 3 and (
|
| 279 |
+
simplified_name := f"nemo.{dep_parts[1]}.{dep_parts[2]}"
|
| 280 |
+
) in find_collection_modules(nemo_root):
|
| 281 |
+
simplified_dependencies[simplified_package_path].append(simplified_name)
|
| 282 |
+
|
| 283 |
+
simplified_dependencies[simplified_package_path].append(package)
|
| 284 |
+
simplified_dependencies[simplified_package_path] = sorted(
|
| 285 |
+
list(set(simplified_dependencies[simplified_package_path]))
|
| 286 |
+
)
|
| 287 |
+
dependencies = simplified_dependencies
|
| 288 |
+
|
| 289 |
+
# Bucket
|
| 290 |
+
bucket_deps: Dict[str, List[str]] = {}
|
| 291 |
+
for package, deps in dependencies.items():
|
| 292 |
+
new_deps = []
|
| 293 |
+
for dep in deps:
|
| 294 |
+
if (
|
| 295 |
+
"nemo.collections.asr" in dep
|
| 296 |
+
or "nemo.collections.tts" in dep
|
| 297 |
+
or "nemo.collections.speechlm" in dep
|
| 298 |
+
or "nemo.collections.audio" in dep
|
| 299 |
+
or "tests.collections.asr" in dep
|
| 300 |
+
or "tests.collections.tts" in dep
|
| 301 |
+
or "tests.collections.speechlm" in dep
|
| 302 |
+
or "tests.collections.audio" in dep
|
| 303 |
+
):
|
| 304 |
+
new_deps.append("speech")
|
| 305 |
+
new_deps.append("unit-tests")
|
| 306 |
+
|
| 307 |
+
if "nemo.export" in dep or "nemo.deploy" in dep or "tests.export" in dep or "tests.deploy" in dep:
|
| 308 |
+
new_deps.append("export-deploy")
|
| 309 |
+
new_deps.append("unit-tests")
|
| 310 |
+
|
| 311 |
+
if (
|
| 312 |
+
"nemo.collections.llm" in dep
|
| 313 |
+
or "nemo.collections.vlm" in dep
|
| 314 |
+
or "nemo.automodel" in dep
|
| 315 |
+
or "tests.collections.llm" in dep
|
| 316 |
+
or "tests.collections.vlm" in dep
|
| 317 |
+
or "tests.automodel" in dep
|
| 318 |
+
):
|
| 319 |
+
new_deps.append("automodel")
|
| 320 |
+
new_deps.append("unit-tests")
|
| 321 |
+
|
| 322 |
+
if "tests" in dep and "tests.functional_tests" not in dep:
|
| 323 |
+
new_deps.append("unit-tests")
|
| 324 |
+
|
| 325 |
+
if (
|
| 326 |
+
"nemo.collections" in dep
|
| 327 |
+
and "nemo.collections.asr" not in dep
|
| 328 |
+
and "nemo.collections.tts" not in dep
|
| 329 |
+
and "nemo.collections.speechlm" not in dep
|
| 330 |
+
and "nemo.collections.audio" not in dep
|
| 331 |
+
and "tests.collections.asr" not in dep
|
| 332 |
+
and "tests.collections.tts" not in dep
|
| 333 |
+
and "tests.collections.speechlm" not in dep
|
| 334 |
+
and "tests.collections.audio" not in dep
|
| 335 |
+
):
|
| 336 |
+
new_deps.append("nemo2")
|
| 337 |
+
new_deps.append("unit-tests")
|
| 338 |
+
|
| 339 |
+
bucket_deps[package] = sorted(list(set(new_deps)))
|
| 340 |
+
|
| 341 |
+
dependencies = bucket_deps
|
| 342 |
+
|
| 343 |
+
# Additional dependencies
|
| 344 |
+
# Add all files in requirements/ directory
|
| 345 |
+
requirements_dir = os.path.join(nemo_root, "requirements")
|
| 346 |
+
if os.path.exists(requirements_dir):
|
| 347 |
+
for filename in os.listdir(requirements_dir):
|
| 348 |
+
filepath = os.path.join("requirements", filename)
|
| 349 |
+
relative_path = os.path.relpath(filepath, nemo_root)
|
| 350 |
+
|
| 351 |
+
dependencies[relative_path] = [
|
| 352 |
+
"nemo2",
|
| 353 |
+
"unit-tests",
|
| 354 |
+
"speech",
|
| 355 |
+
"automodel",
|
| 356 |
+
"export-deploy",
|
| 357 |
+
]
|
| 358 |
+
|
| 359 |
+
# Add all Dockerfile files
|
| 360 |
+
for root, _, files in os.walk(nemo_root):
|
| 361 |
+
for file_path in files:
|
| 362 |
+
full_path = os.path.join(root, file_path)
|
| 363 |
+
relative_path = os.path.relpath(full_path, nemo_root)
|
| 364 |
+
|
| 365 |
+
if "cicd-main-export-deploy" in file_path:
|
| 366 |
+
dependencies[relative_path] = ["export-deploy"]
|
| 367 |
+
if "cicd-main-nemo2" in file_path:
|
| 368 |
+
dependencies[relative_path] = ["nemo2"]
|
| 369 |
+
if "cicd-main-speech" in file_path:
|
| 370 |
+
dependencies[relative_path] = ["speech"]
|
| 371 |
+
if "cicd-main-automodel" in file_path:
|
| 372 |
+
dependencies[relative_path] = ["automodel"]
|
| 373 |
+
if "cicd-main-unit-tests" in file_path:
|
| 374 |
+
dependencies[relative_path] = ["unit-tests"]
|
| 375 |
+
if "Dockerfile" in file_path:
|
| 376 |
+
dependencies[relative_path] = ["nemo2", "unit-tests", "speech", "automodel", "export-deploy"]
|
| 377 |
+
|
| 378 |
+
# Sort dependencies by length of values (number of dependencies)
|
| 379 |
+
dependencies = dict(sorted(dependencies.items(), key=lambda x: len(x[1]), reverse=True))
|
| 380 |
+
|
| 381 |
+
return dependencies
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
def main():
|
| 385 |
+
"""Main function to analyze dependencies and output JSON."""
|
| 386 |
+
# Get the root directory of the NeMo project
|
| 387 |
+
nemo_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 388 |
+
|
| 389 |
+
# Build dependency graph
|
| 390 |
+
dependencies = build_dependency_graph(nemo_root)
|
| 391 |
+
|
| 392 |
+
# Output as JSON
|
| 393 |
+
data = json.dumps(dependencies, indent=4)
|
| 394 |
+
|
| 395 |
+
with open('nemo_dependencies.json', 'w', encoding='utf-8') as f:
|
| 396 |
+
f.write(data)
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
if __name__ == "__main__":
|
| 400 |
+
main()
|
.github/scripts/notify.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
import os
|
| 15 |
+
|
| 16 |
+
import requests
|
| 17 |
+
from github import Github
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def send_slack_notification():
|
| 21 |
+
# Get environment variables
|
| 22 |
+
gh_token = os.environ.get('GH_TOKEN')
|
| 23 |
+
webhook_url = os.environ.get('SLACK_WEBHOOK')
|
| 24 |
+
repository = os.environ.get('REPOSITORY')
|
| 25 |
+
run_id = os.environ.get('RUN_ID')
|
| 26 |
+
server_url = os.environ.get('SERVER_URL', 'https://github.com')
|
| 27 |
+
pr_number = int(os.environ.get('PR_NUMBER', 0))
|
| 28 |
+
branch_name = os.environ.get('BRANCH_NAME')
|
| 29 |
+
|
| 30 |
+
# Get failure info from GitHub API
|
| 31 |
+
gh = Github(gh_token)
|
| 32 |
+
repo = gh.get_repo(repository)
|
| 33 |
+
|
| 34 |
+
# Get failed jobs
|
| 35 |
+
failed_jobs = [job.name for job in repo.get_workflow_run(int(run_id)).jobs() if job.conclusion == 'failure']
|
| 36 |
+
|
| 37 |
+
if pr_number != 0:
|
| 38 |
+
pr = repo.get_pull(pr_number)
|
| 39 |
+
|
| 40 |
+
title = f"*<{server_url}/{repository}/pull/{pr_number}|PR#{pr_number}>: {pr.title.replace('`', '')}*"
|
| 41 |
+
author = f"<{server_url}/{pr.user.login}|{pr.user.login}>"
|
| 42 |
+
branch = f"<{server_url}/{pr.head.repo.full_name}/tree/{pr.head.ref}|{pr.head.ref}>"
|
| 43 |
+
else:
|
| 44 |
+
title = f"*Run on <{server_url}/{repository}/tree/{branch_name}|{branch_name}>*"
|
| 45 |
+
author = "No author"
|
| 46 |
+
branch = f"<{server_url}/{repository}/tree/{branch_name}|{branch_name}>"
|
| 47 |
+
|
| 48 |
+
blocks = [
|
| 49 |
+
{
|
| 50 |
+
"type": "section",
|
| 51 |
+
"text": {
|
| 52 |
+
"type": "mrkdwn",
|
| 53 |
+
"text": (
|
| 54 |
+
f"{title}\n"
|
| 55 |
+
f"• Author: {author}\n"
|
| 56 |
+
f"• Branch: {branch}\n"
|
| 57 |
+
f"• Pipeline: <{server_url}/{repository}/actions/runs/{run_id}|View Run>\n"
|
| 58 |
+
f"• Failed Jobs:\n"
|
| 59 |
+
+ "\n".join(
|
| 60 |
+
[
|
| 61 |
+
f" • <{server_url}/{repository}/actions/runs/{run_id}|{job.split('/')[-1]}>"
|
| 62 |
+
for job in failed_jobs
|
| 63 |
+
if job.split('/')[-1] != 'Nemo_CICD_Test'
|
| 64 |
+
]
|
| 65 |
+
)
|
| 66 |
+
),
|
| 67 |
+
},
|
| 68 |
+
}
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
print({"blocks": blocks})
|
| 72 |
+
|
| 73 |
+
# Send to Slack
|
| 74 |
+
response = requests.post(webhook_url, json={"blocks": blocks})
|
| 75 |
+
response.raise_for_status()
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
if __name__ == "__main__":
|
| 79 |
+
send_slack_notification()
|
.github/workflows/_build_container.yml
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: ~Build container template
|
| 2 |
+
on:
|
| 3 |
+
workflow_call:
|
| 4 |
+
inputs:
|
| 5 |
+
image-name:
|
| 6 |
+
required: true
|
| 7 |
+
type: string
|
| 8 |
+
description: "The name of the image to build"
|
| 9 |
+
dockerfile:
|
| 10 |
+
required: true
|
| 11 |
+
type: string
|
| 12 |
+
runner:
|
| 13 |
+
required: false
|
| 14 |
+
default: self-hosted-azure-builder
|
| 15 |
+
type: string
|
| 16 |
+
description: "The runner to use for the build"
|
| 17 |
+
|
| 18 |
+
jobs:
|
| 19 |
+
pre-flight:
|
| 20 |
+
runs-on: ubuntu-latest
|
| 21 |
+
outputs:
|
| 22 |
+
build_args: ${{ steps.manifest.outputs.BUILD_ARGS }}
|
| 23 |
+
cache-from: ${{ steps.cache_from.outputs.LAST_PRS }}
|
| 24 |
+
steps:
|
| 25 |
+
- name: Checkout repository
|
| 26 |
+
uses: actions/checkout@v4
|
| 27 |
+
|
| 28 |
+
- name: Parse manifest.json
|
| 29 |
+
id: manifest
|
| 30 |
+
run: |
|
| 31 |
+
BUILD_ARGS=$(cat << EOF
|
| 32 |
+
BASE_IMAGE=$(cat requirements/manifest.json | jq -r '."ngc-pytorch"')
|
| 33 |
+
TRTLLM_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."trt-llm".repo')
|
| 34 |
+
TRTLLM_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."trt-llm".ref')
|
| 35 |
+
MLM_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."megatron-lm".repo')
|
| 36 |
+
MLM_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."megatron-lm".ref')
|
| 37 |
+
TE_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".transformer_engine.repo')
|
| 38 |
+
TE_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".transformer_engine.ref')
|
| 39 |
+
APEX_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".apex.repo')
|
| 40 |
+
APEX_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".apex.ref')
|
| 41 |
+
EOF
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
echo "BUILD_ARGS<<EOF" >> $GITHUB_OUTPUT
|
| 45 |
+
echo "$BUILD_ARGS" >> $GITHUB_OUTPUT
|
| 46 |
+
echo "EOF" >> $GITHUB_OUTPUT
|
| 47 |
+
|
| 48 |
+
- name: Get last merged PR
|
| 49 |
+
id: cache_from
|
| 50 |
+
env:
|
| 51 |
+
GH_TOKEN: ${{ github.token }}
|
| 52 |
+
run: |
|
| 53 |
+
LAST_PRS=$(gh api graphql -f query='
|
| 54 |
+
query {
|
| 55 |
+
repository(owner: "NVIDIA", name: "NeMo") {
|
| 56 |
+
pullRequests(states: MERGED, first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) {
|
| 57 |
+
nodes {
|
| 58 |
+
number
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
}' | jq -r '.data.repository.pullRequests.nodes[].number' | while read -r number; do
|
| 63 |
+
echo "nemoci.azurecr.io/${{ inputs.image-name }}-buildcache:$number"
|
| 64 |
+
done)
|
| 65 |
+
|
| 66 |
+
echo "LAST_PRS<<EOF" >> $GITHUB_OUTPUT
|
| 67 |
+
echo "$LAST_PRS" >> $GITHUB_OUTPUT
|
| 68 |
+
echo "EOF" >> $GITHUB_OUTPUT
|
| 69 |
+
|
| 70 |
+
build:
|
| 71 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
| 72 |
+
needs: [pre-flight]
|
| 73 |
+
with:
|
| 74 |
+
image-name: ${{ inputs.image-name }}
|
| 75 |
+
dockerfile: ${{ inputs.dockerfile }}
|
| 76 |
+
image-label: nemo-core
|
| 77 |
+
build-args: |
|
| 78 |
+
IMAGE_LABEL=nemo-core
|
| 79 |
+
NEMO_TAG=${{ github.sha }}
|
| 80 |
+
NEMO_REPO=https://github.com/NVIDIA/NeMo
|
| 81 |
+
PR_NUMBER=${{ github.event.pull_request.number || 0 }}
|
| 82 |
+
${{ needs.pre-flight.outputs.build_args }}
|
| 83 |
+
prune-filter-timerange: 24h
|
| 84 |
+
use-inline-cache: false
|
| 85 |
+
cache-from: |
|
| 86 |
+
nemoci.azurecr.io/${{ inputs.image-name }}-buildcache:main
|
| 87 |
+
nemoci.azurecr.io/${{ inputs.image-name }}-buildcache:${{ github.event.pull_request.number || 0 }}
|
| 88 |
+
${{ needs.pre-flight.outputs.cache-from }}
|
| 89 |
+
runner: ${{ inputs.runner }}
|
.github/workflows/_bump_mcore_tag.yml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: ~Bump Megatron Tag template
|
| 2 |
+
on:
|
| 3 |
+
workflow_call:
|
| 4 |
+
inputs:
|
| 5 |
+
nemo-target-branch:
|
| 6 |
+
required: true
|
| 7 |
+
type: string
|
| 8 |
+
description: "The target branch to bump"
|
| 9 |
+
mcore-target-branch:
|
| 10 |
+
required: true
|
| 11 |
+
type: string
|
| 12 |
+
description: "The target branch to bump"
|
| 13 |
+
secrets:
|
| 14 |
+
PAT:
|
| 15 |
+
required: true
|
| 16 |
+
|
| 17 |
+
jobs:
|
| 18 |
+
update-branch:
|
| 19 |
+
runs-on: ubuntu-latest
|
| 20 |
+
steps:
|
| 21 |
+
- uses: actions/checkout@v2
|
| 22 |
+
with:
|
| 23 |
+
ref: ${{ inputs.nemo-target-branch }}
|
| 24 |
+
|
| 25 |
+
- name: Set Git config
|
| 26 |
+
run: |
|
| 27 |
+
git config --local user.email "[email protected]"
|
| 28 |
+
git config --local user.name "Github Actions"
|
| 29 |
+
- name: Merge weekly-bump-${{ inputs.nemo-target-branch }} back to base branch
|
| 30 |
+
env:
|
| 31 |
+
SOURCE_BRANCH: weekly-bump-${{ inputs.nemo-target-branch }}
|
| 32 |
+
TARGET_BRANCH: ${{ inputs.nemo-target-branch }}
|
| 33 |
+
run: |
|
| 34 |
+
if git ls-remote --exit-code origin $SOURCE_BRANCH; then
|
| 35 |
+
git fetch --unshallow
|
| 36 |
+
git checkout $SOURCE_BRANCH
|
| 37 |
+
git pull
|
| 38 |
+
git merge --no-ff $TARGET_BRANCH -m "chore: Auto-merge $TARGET_BRANCH into $SOURCE_BRANCH"
|
| 39 |
+
else
|
| 40 |
+
git checkout -b $SOURCE_BRANCH $TARGET_BRANCH
|
| 41 |
+
fi
|
| 42 |
+
git push -u origin $SOURCE_BRANCH
|
| 43 |
+
|
| 44 |
+
mcore:
|
| 45 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
| 46 |
+
needs: [update-branch]
|
| 47 |
+
with:
|
| 48 |
+
source-repository: NVIDIA/Megatron-LM
|
| 49 |
+
source-ref: ${{ inputs.mcore-target-branch }}
|
| 50 |
+
yaml-path: '."vcs-dependencies"."megatron-lm".ref'
|
| 51 |
+
file: requirements/manifest.json
|
| 52 |
+
base-branch: weekly-bump-${{ inputs.nemo-target-branch }}
|
| 53 |
+
cicd-labels: Run CICD,no-fail-fast
|
| 54 |
+
pr-reviewers: ${{ inputs.pr-reviewers }}
|
| 55 |
+
secrets:
|
| 56 |
+
PAT: ${{ secrets.PAT }}
|
.github/workflows/build-test-publish-wheel.yml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: Build, test, and publish a PyPi wheel (to testpypi).
|
| 16 |
+
|
| 17 |
+
on:
|
| 18 |
+
push:
|
| 19 |
+
branches:
|
| 20 |
+
- main
|
| 21 |
+
- "r**"
|
| 22 |
+
|
| 23 |
+
defaults:
|
| 24 |
+
run:
|
| 25 |
+
shell: bash -x -e -u -o pipefail {0}
|
| 26 |
+
|
| 27 |
+
jobs:
|
| 28 |
+
build-test-publish-wheel:
|
| 29 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
| 30 |
+
with:
|
| 31 |
+
dry-run: true
|
| 32 |
+
python-package: nemo
|
| 33 |
+
python-version: "3.10"
|
| 34 |
+
secrets:
|
| 35 |
+
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
|
| 36 |
+
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
| 37 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
|
| 38 |
+
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
|
.github/workflows/changelog-build.yml
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: 'Changelog Build (Release)'
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_dispatch:
|
| 5 |
+
inputs:
|
| 6 |
+
last-release-tag:
|
| 7 |
+
description: Last Git tag to start from (exclusive) (e.g. `v2.0.0`)
|
| 8 |
+
type: string
|
| 9 |
+
required: true
|
| 10 |
+
release-branch:
|
| 11 |
+
description: Release branch to build changelog on (e.g. `r2.1.0`)
|
| 12 |
+
type: string
|
| 13 |
+
required: true
|
| 14 |
+
changelog-main-content:
|
| 15 |
+
description: Custom changelog content to include before detailed changelogs
|
| 16 |
+
type: string
|
| 17 |
+
required: false
|
| 18 |
+
default: ''
|
| 19 |
+
|
| 20 |
+
jobs:
|
| 21 |
+
changelog:
|
| 22 |
+
runs-on: ubuntu-latest
|
| 23 |
+
steps:
|
| 24 |
+
- name: Checkout branch
|
| 25 |
+
uses: actions/checkout@v4
|
| 26 |
+
with:
|
| 27 |
+
ref: main
|
| 28 |
+
fetch-depth: 0
|
| 29 |
+
|
| 30 |
+
- name: Build Changelog
|
| 31 |
+
id: github_tag
|
| 32 |
+
uses: mikepenz/[email protected]
|
| 33 |
+
env:
|
| 34 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 35 |
+
with:
|
| 36 |
+
# Configuration file is setup with filters for domains
|
| 37 |
+
# owner:repo must point to current repo
|
| 38 |
+
# fromTag: Auto resolved from historical tag order (previous tag compared to current tag)
|
| 39 |
+
# toTag: Current tag reference
|
| 40 |
+
configuration: ".github/workflows/config/changelog-config.json"
|
| 41 |
+
owner: ${{ github.repository_owner }}
|
| 42 |
+
repo: ${{ github.event.repository.name }}
|
| 43 |
+
ignorePreReleases: "false"
|
| 44 |
+
failOnError: "false"
|
| 45 |
+
fromTag: ${{ inputs.last-release-tag }}
|
| 46 |
+
toTag: ${{ inputs.release-branch }}
|
| 47 |
+
|
| 48 |
+
- name: Update changelog file
|
| 49 |
+
env:
|
| 50 |
+
RELEASE_BRANCH: ${{ inputs.release-branch }}
|
| 51 |
+
CHANGELOG: ${{ steps.github_tag.outputs.changelog }}
|
| 52 |
+
MAIN_CONTENT: ${{ inputs.changelog-main-content }}
|
| 53 |
+
shell: bash -x -e -u -o pipefail {0}
|
| 54 |
+
run: |
|
| 55 |
+
RELEASE_VERSION=${RELEASE_BRANCH#r}
|
| 56 |
+
CHANGELOG=$(echo "$CHANGELOG" | sed '/^[[:blank:]]*#/s/#/###/')
|
| 57 |
+
|
| 58 |
+
# Build release notes starting with version header
|
| 59 |
+
RELEASE_NOTES="## NVIDIA Neural Modules $RELEASE_VERSION"
|
| 60 |
+
|
| 61 |
+
# Add custom content if provided
|
| 62 |
+
if [ -n "$MAIN_CONTENT" ]; then
|
| 63 |
+
RELEASE_NOTES="$RELEASE_NOTES
|
| 64 |
+
|
| 65 |
+
$MAIN_CONTENT"
|
| 66 |
+
fi
|
| 67 |
+
|
| 68 |
+
# Add detailed changelogs section
|
| 69 |
+
RELEASE_NOTES="$RELEASE_NOTES
|
| 70 |
+
|
| 71 |
+
### Detailed Changelogs:
|
| 72 |
+
|
| 73 |
+
$CHANGELOG"
|
| 74 |
+
|
| 75 |
+
printf "%s\n" "$RELEASE_NOTES" | sed '/<!-- Next changelog -->/r /dev/stdin' CHANGELOG.md > CHANGELOG.tmp.md
|
| 76 |
+
|
| 77 |
+
mv CHANGELOG.tmp.md CHANGELOG.md
|
| 78 |
+
|
| 79 |
+
- name: Inspect new changelog file
|
| 80 |
+
run: cat CHANGELOG.md
|
| 81 |
+
|
| 82 |
+
- name: Create or update label
|
| 83 |
+
uses: actions/github-script@v6
|
| 84 |
+
with:
|
| 85 |
+
script: |
|
| 86 |
+
const labelName = '${{ inputs.release-branch }}';
|
| 87 |
+
const labelColor = '0366d6'; // Blue color
|
| 88 |
+
const labelDescription = `Release ${labelName}`;
|
| 89 |
+
|
| 90 |
+
try {
|
| 91 |
+
// Try to get the label
|
| 92 |
+
await github.rest.issues.getLabel({
|
| 93 |
+
owner: context.repo.owner,
|
| 94 |
+
repo: context.repo.repo,
|
| 95 |
+
name: labelName
|
| 96 |
+
});
|
| 97 |
+
console.log(`Label '${labelName}' already exists`);
|
| 98 |
+
} catch (error) {
|
| 99 |
+
if (error.status === 404) {
|
| 100 |
+
// Label doesn't exist, create it
|
| 101 |
+
await github.rest.issues.createLabel({
|
| 102 |
+
owner: context.repo.owner,
|
| 103 |
+
repo: context.repo.repo,
|
| 104 |
+
name: labelName,
|
| 105 |
+
color: labelColor,
|
| 106 |
+
description: labelDescription
|
| 107 |
+
});
|
| 108 |
+
console.log(`Created label '${labelName}'`);
|
| 109 |
+
} else {
|
| 110 |
+
throw error;
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
- name: Create Pull Request
|
| 115 |
+
uses: peter-evans/create-pull-request@v7
|
| 116 |
+
with:
|
| 117 |
+
commit-message: "beep boop: Update changelog"
|
| 118 |
+
title: "Update changelog for `${{ inputs.release-branch }}`"
|
| 119 |
+
signoff: true
|
| 120 |
+
sign-commits: true
|
| 121 |
+
base: main
|
| 122 |
+
branch: bot/chore/update-changelog-into-${{ inputs.release-branch }}
|
| 123 |
+
labels: ${{ inputs.release-branch }}
|
.github/workflows/cherry-pick-release-commit.yml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Create PR to main with cherry-pick from release
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
cherry-pick:
|
| 10 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
| 11 |
+
secrets:
|
| 12 |
+
PAT: ${{ secrets.PAT }}
|
| 13 |
+
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
|
| 14 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
.github/workflows/cicd-approve-test-queue.yml
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: Approve Test Queue
|
| 16 |
+
|
| 17 |
+
on:
|
| 18 |
+
schedule:
|
| 19 |
+
- cron: '*/5 * * * *' # Runs every 5 minutes
|
| 20 |
+
workflow_dispatch: # Allows manual triggering
|
| 21 |
+
|
| 22 |
+
jobs:
|
| 23 |
+
approve-queue:
|
| 24 |
+
runs-on: ubuntu-latest
|
| 25 |
+
environment: main
|
| 26 |
+
steps:
|
| 27 |
+
- name: Checkout repository
|
| 28 |
+
uses: actions/checkout@v4
|
| 29 |
+
|
| 30 |
+
- name: Set up Python
|
| 31 |
+
uses: actions/setup-python@v5
|
| 32 |
+
with:
|
| 33 |
+
python-version: "3.12"
|
| 34 |
+
|
| 35 |
+
- name: Install dependencies
|
| 36 |
+
run: |
|
| 37 |
+
python -m pip install --upgrade pip
|
| 38 |
+
pip install requests
|
| 39 |
+
|
| 40 |
+
- name: Approve waiting deployments
|
| 41 |
+
env:
|
| 42 |
+
GITHUB_TOKEN: ${{ secrets.PAT }}
|
| 43 |
+
MAX_CONCURRENCY: ${{ vars.MAX_CONCURRENCY || 1 }}
|
| 44 |
+
run: |
|
| 45 |
+
python - <<EOF
|
| 46 |
+
import os
|
| 47 |
+
import requests
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# GitHub API configuration
|
| 51 |
+
GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
|
| 52 |
+
REPO = os.environ["GITHUB_REPOSITORY"]
|
| 53 |
+
MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY"])
|
| 54 |
+
API_BASE = f"https://api.github.com/repos/{REPO}"
|
| 55 |
+
|
| 56 |
+
# Headers for GitHub API
|
| 57 |
+
headers = {
|
| 58 |
+
"Authorization": f"token {GITHUB_TOKEN}",
|
| 59 |
+
"Accept": "application/vnd.github.v3+json",
|
| 60 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
def make_request(endpoint, method="GET", data=None):
|
| 64 |
+
"""Make a request to the GitHub API with error handling."""
|
| 65 |
+
url = f"{API_BASE}/{endpoint}"
|
| 66 |
+
try:
|
| 67 |
+
if method == "GET":
|
| 68 |
+
response = requests.get(url, headers=headers)
|
| 69 |
+
else:
|
| 70 |
+
response = requests.post(url, headers=headers, json=data)
|
| 71 |
+
response.raise_for_status()
|
| 72 |
+
response_json = response.json()
|
| 73 |
+
if hasattr(response, "links") and "actions/runs?status" in endpoint:
|
| 74 |
+
response_json["next"] = response.links.get("next", {}).get("url")
|
| 75 |
+
|
| 76 |
+
return response_json
|
| 77 |
+
except requests.exceptions.RequestException as e:
|
| 78 |
+
print(f"Error making request to {endpoint}: {str(e)}")
|
| 79 |
+
if hasattr(e.response, 'text'):
|
| 80 |
+
print(f"Response: {e.response.text}")
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def get_workflow_runs(status):
|
| 85 |
+
"""Get all workflow runs for a given status."""
|
| 86 |
+
all_results = []
|
| 87 |
+
endpoint = f"actions/runs?status={status}"
|
| 88 |
+
while endpoint:
|
| 89 |
+
response = make_request(endpoint)
|
| 90 |
+
if not response:
|
| 91 |
+
break
|
| 92 |
+
|
| 93 |
+
all_results.extend(response.get("workflow_runs", []))
|
| 94 |
+
endpoint = None
|
| 95 |
+
next_url = response.get("next")
|
| 96 |
+
if next_url:
|
| 97 |
+
endpoint = f"actions/runs?{next_url.split('?')[1]}"
|
| 98 |
+
|
| 99 |
+
return all_results
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
# Get current running and queued workflows
|
| 103 |
+
print("Fetching workflow runs...")
|
| 104 |
+
queued_workflow_runs = get_workflow_runs("queued")
|
| 105 |
+
in_progress_workflow_runs = get_workflow_runs("in_progress")
|
| 106 |
+
|
| 107 |
+
# Count running and queued workflows
|
| 108 |
+
queued_workflows = sum(1 for run in queued_workflow_runs if run["name"] == "CICD NeMo")
|
| 109 |
+
in_progress_workflows = sum(1 for run in in_progress_workflow_runs if run["name"] == "CICD NeMo")
|
| 110 |
+
|
| 111 |
+
total_workflows = queued_workflows + in_progress_workflows
|
| 112 |
+
print(f"Current queued workflows: {queued_workflows}")
|
| 113 |
+
print(f"Current running workflows: {in_progress_workflows}")
|
| 114 |
+
print(f"Total workflows: {total_workflows}")
|
| 115 |
+
print(f"Max concurrency: {MAX_CONCURRENCY}")
|
| 116 |
+
|
| 117 |
+
if total_workflows >= MAX_CONCURRENCY:
|
| 118 |
+
print("Maximum concurrency reached, no new approvals will be made")
|
| 119 |
+
exit(0)
|
| 120 |
+
|
| 121 |
+
# Get waiting CI workflows for test environment
|
| 122 |
+
print("Fetching deployments...")
|
| 123 |
+
pending_workflows = get_workflow_runs("waiting")
|
| 124 |
+
pending_workflows = [run for run in pending_workflows if run["name"] == "CICD NeMo"]
|
| 125 |
+
|
| 126 |
+
# Sort deployments by creation date (oldest first)
|
| 127 |
+
print("Sorting workflows...")
|
| 128 |
+
pending_workflows = sorted(pending_workflows, key=lambda x: x["created_at"])
|
| 129 |
+
|
| 130 |
+
# Process each deployment
|
| 131 |
+
print("Processing ...")
|
| 132 |
+
for workflow in pending_workflows:
|
| 133 |
+
if total_workflows >= MAX_CONCURRENCY:
|
| 134 |
+
print("Maximum concurrency reached, stopping approvals")
|
| 135 |
+
break
|
| 136 |
+
|
| 137 |
+
workflow_id = workflow["id"]
|
| 138 |
+
workflow_name = workflow["display_title"]
|
| 139 |
+
print(f"Approving workflow {workflow_name} with Run Id: {workflow_id}")
|
| 140 |
+
|
| 141 |
+
deployment_url = f"actions/runs/{workflow_id}/pending_deployments"
|
| 142 |
+
deployment = make_request(deployment_url)[0]
|
| 143 |
+
environment_id = deployment["environment"]["id"]
|
| 144 |
+
|
| 145 |
+
# Approve the deployment
|
| 146 |
+
status_data = {
|
| 147 |
+
"environment_ids": [environment_id],
|
| 148 |
+
"state": "approved",
|
| 149 |
+
"comment": "Automatically approved by queue manager"
|
| 150 |
+
}
|
| 151 |
+
result = make_request(deployment_url, method="POST", data=status_data)
|
| 152 |
+
|
| 153 |
+
if result:
|
| 154 |
+
total_workflows += 1
|
| 155 |
+
else:
|
| 156 |
+
print(f"Failed to approve deployment {deployment['id']}")
|
| 157 |
+
exit(1)
|
| 158 |
+
|
| 159 |
+
EOF
|
| 160 |
+
notify:
|
| 161 |
+
if: failure()
|
| 162 |
+
runs-on: ubuntu-latest
|
| 163 |
+
needs: [approve-queue]
|
| 164 |
+
steps:
|
| 165 |
+
- name: Notify
|
| 166 |
+
env:
|
| 167 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
| 168 |
+
SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
|
| 169 |
+
GITHUB_RUN_ID: ${{ github.run_id }}
|
| 170 |
+
GITHUB_REPOSITORY: ${{ github.repository }}
|
| 171 |
+
run: |
|
| 172 |
+
curl -X POST \
|
| 173 |
+
-H 'Content-type: application/json' \
|
| 174 |
+
--data "{\"text\":\":robot_joy: <https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}|Test-queue-approval-bot workflow> failed. Please review manually.\n\ncc ${SLACK_WEBHOOK_ADMIN}\"}" \
|
| 175 |
+
$SLACK_WEBHOOK
|
.github/workflows/cicd-main-nemo2.yml
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: NeMo E2E NeMo2 Tests
|
| 15 |
+
on:
|
| 16 |
+
workflow_call:
|
| 17 |
+
inputs:
|
| 18 |
+
test_to_run:
|
| 19 |
+
required: true
|
| 20 |
+
type: string
|
| 21 |
+
image-name:
|
| 22 |
+
required: false
|
| 23 |
+
default: nemo_container_nemo2
|
| 24 |
+
type: string
|
| 25 |
+
|
| 26 |
+
jobs:
|
| 27 |
+
build:
|
| 28 |
+
uses: ./.github/workflows/_build_container.yml
|
| 29 |
+
with:
|
| 30 |
+
image-name: ${{ inputs.image-name }}
|
| 31 |
+
dockerfile: docker/Dockerfile.ci
|
| 32 |
+
|
| 33 |
+
e2e-tests:
|
| 34 |
+
strategy:
|
| 35 |
+
fail-fast: false
|
| 36 |
+
matrix:
|
| 37 |
+
include:
|
| 38 |
+
- script: L2_NeMo_2_GPT_Pretraining_no_transformer_engine
|
| 39 |
+
runner: self-hosted-azure
|
| 40 |
+
- script: L2_NeMo_2_llama3_pretraining_recipe
|
| 41 |
+
runner: self-hosted-azure
|
| 42 |
+
# - script: L2_NeMo_2_llama3_pytorch_profiler
|
| 43 |
+
# runner: self-hosted-azure
|
| 44 |
+
# timeout: 20
|
| 45 |
+
- script: L2_NeMo_2_llama3_fault_tolerance_plugin
|
| 46 |
+
runner: self-hosted-azure
|
| 47 |
+
- script: L2_NeMo_2_llama3_straggler_detection
|
| 48 |
+
runner: self-hosted-azure
|
| 49 |
+
- script: L2_NeMo_2_llama3_local_ckpt
|
| 50 |
+
runner: self-hosted-azure
|
| 51 |
+
- script: L2_NeMo_2_GPT_DDP_Param_Parity_check
|
| 52 |
+
runner: self-hosted-azure
|
| 53 |
+
- script: L2_NeMo_2_Hyena_Conversion_from_HF
|
| 54 |
+
runner: self-hosted-azure
|
| 55 |
+
- script: L2_NeMo_2_Hyena_DDP_Pretraining_Test
|
| 56 |
+
runner: self-hosted-azure
|
| 57 |
+
- script: L2_NeMo_2_Hyena_Mixer_Test
|
| 58 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 59 |
+
- script: L2_NeMo_2_Hyena_PP_Pretraining_Test
|
| 60 |
+
runner: self-hosted-azure
|
| 61 |
+
- script: L2_NeMo_2_Hyena_TP_Pretraining_Test
|
| 62 |
+
runner: self-hosted-azure
|
| 63 |
+
- script: L2_NeMo_2_Hyena_CP_Pretraining_Test
|
| 64 |
+
runner: self-hosted-azure
|
| 65 |
+
- script: L2_NeMo_2_SSM_Pretraining
|
| 66 |
+
runner: self-hosted-azure
|
| 67 |
+
- script: L2_NeMo_2_SSM_Finetuning
|
| 68 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 69 |
+
- script: L2_NeMo_2_HF_MODEL_IMPORT
|
| 70 |
+
runner: self-hosted-azure
|
| 71 |
+
- script: L2_NeMo_2_jit_callback
|
| 72 |
+
runner: self-hosted-azure
|
| 73 |
+
- script: L2_NeMo_2_T5_Pretraining
|
| 74 |
+
runner: self-hosted-azure
|
| 75 |
+
- script: L2_NeMo_2_T5_MockData_Pretraining
|
| 76 |
+
runner: self-hosted-azure
|
| 77 |
+
- script: L2_NeMo_2_T5_Finetuning
|
| 78 |
+
runner: self-hosted-azure
|
| 79 |
+
- script: L2_NeMo_2_T5_Squad
|
| 80 |
+
runner: self-hosted-azure
|
| 81 |
+
- script: L2_NeMo_2_T5_LoRA
|
| 82 |
+
runner: self-hosted-azure
|
| 83 |
+
- script: L2_NeMo_2_BERT_Pretraining_Megatron
|
| 84 |
+
runner: self-hosted-azure
|
| 85 |
+
- script: L2_NeMo_2_BERT_Pretraining_HuggingFace
|
| 86 |
+
runner: self-hosted-azure
|
| 87 |
+
- script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_TP2
|
| 88 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 89 |
+
- script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_PP2
|
| 90 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 91 |
+
- script: L2_NeMo_2_NEVA_MOCK_PRETRAIN_CP2
|
| 92 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 93 |
+
- script: L2_NeMo_2_NEVA_MOCK_FINETUNE_TP2
|
| 94 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 95 |
+
- script: L2_NeMo_2_NEVA_ENERGON_FINETUNE_TP2
|
| 96 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 97 |
+
- script: L2_NeMo_2_NEVA_MOCK_FINETUNE_PP2
|
| 98 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 99 |
+
- script: L2_NeMo_2_NEVA_MOCK_FINETUNE_CP2
|
| 100 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 101 |
+
- script: L2_NeMo_2_NEVA_PRELOADED_FINETUNE_PP2_SEQPACK_PAD
|
| 102 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 103 |
+
- script: L2_NeMo_2_NEVA_PRELOADED_FINETUNE_PP2_SEQPACK_TRUNC
|
| 104 |
+
runner: self-hosted-azure-gpus-2-h100
|
| 105 |
+
- script: L2_NeMo_2_NEVA_LOAD_GENERATE
|
| 106 |
+
runner: self-hosted-azure-gpus-1
|
| 107 |
+
- script: L2_NeMo_2_LLAVA_IMPORT
|
| 108 |
+
runner: self-hosted-azure-gpus-1
|
| 109 |
+
- script: L2_NEMO_2_MLLAMA_Inference
|
| 110 |
+
runner: self-hosted-azure-gpus-1
|
| 111 |
+
- script: L2_NeMo_2_MLLAMA_MOCK_FINETUNE_TP2
|
| 112 |
+
runner: self-hosted-azure
|
| 113 |
+
- script: L2_NeMo_2_MLLAMA_PRELOADED_FINETUNE_TP2
|
| 114 |
+
runner: self-hosted-azure
|
| 115 |
+
- script: L2_NeMo_2_MLLAMA_ENERGON_FINETUNE_TP2
|
| 116 |
+
runner: self-hosted-azure
|
| 117 |
+
- script: L2_NeMo_2_MLLAMA_IMPORT
|
| 118 |
+
runner: self-hosted-azure-gpus-1
|
| 119 |
+
- script: L2_NeMo_2_Mixtral_Pretraining
|
| 120 |
+
runner: self-hosted-azure
|
| 121 |
+
- script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1
|
| 122 |
+
runner: self-hosted-azure
|
| 123 |
+
- script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS2
|
| 124 |
+
runner: self-hosted-azure
|
| 125 |
+
- script: L2_NeMo_2_GPT_SFT_TP1PP2_MBS2
|
| 126 |
+
runner: self-hosted-azure
|
| 127 |
+
- script: L2_NeMo_2_GPT_SFT_TP2PP1_MBS2
|
| 128 |
+
runner: self-hosted-azure
|
| 129 |
+
- script: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED
|
| 130 |
+
runner: self-hosted-azure
|
| 131 |
+
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1
|
| 132 |
+
runner: self-hosted-azure
|
| 133 |
+
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2
|
| 134 |
+
runner: self-hosted-azure
|
| 135 |
+
- script: L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2
|
| 136 |
+
runner: self-hosted-azure
|
| 137 |
+
- script: L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2
|
| 138 |
+
runner: self-hosted-azure
|
| 139 |
+
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED
|
| 140 |
+
runner: self-hosted-azure
|
| 141 |
+
- script: L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED
|
| 142 |
+
runner: self-hosted-azure
|
| 143 |
+
- script: L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED
|
| 144 |
+
runner: self-hosted-azure
|
| 145 |
+
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat
|
| 146 |
+
runner: self-hosted-azure
|
| 147 |
+
- script: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_TE_op_fuser
|
| 148 |
+
runner: self-hosted-azure
|
| 149 |
+
- script: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2_exclude
|
| 150 |
+
runner: self-hosted-azure
|
| 151 |
+
- script: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2
|
| 152 |
+
runner: self-hosted-azure
|
| 153 |
+
- script: L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1
|
| 154 |
+
runner: self-hosted-azure
|
| 155 |
+
- script: L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1
|
| 156 |
+
runner: self-hosted-azure
|
| 157 |
+
- script: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1
|
| 158 |
+
runner: self-hosted-azure
|
| 159 |
+
- script: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1_exclude
|
| 160 |
+
runner: self-hosted-azure
|
| 161 |
+
- script: L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1
|
| 162 |
+
runner: self-hosted-azure
|
| 163 |
+
- script: L2_NEMO_2_LoRA_MERGE
|
| 164 |
+
runner: self-hosted-azure
|
| 165 |
+
- script: L2_NEMO_2_LoRA_Inference
|
| 166 |
+
runner: self-hosted-azure-gpus-1
|
| 167 |
+
- script: L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact
|
| 168 |
+
runner: self-hosted-azure
|
| 169 |
+
is-optional: true
|
| 170 |
+
- script: L2_NeMo_2_PTQ_Llama2_FP8_trtllm
|
| 171 |
+
runner: self-hosted-azure
|
| 172 |
+
- script: L2_NeMo_2_PTQ_Llama2_FP8_nemo
|
| 173 |
+
runner: self-hosted-azure
|
| 174 |
+
- script: L2_NeMo_2_Distill_Llama3_TP1PP2
|
| 175 |
+
runner: self-hosted-azure
|
| 176 |
+
- script: L2_NeMo_2_Prune_Llama_TP1PP2
|
| 177 |
+
runner: self-hosted-azure
|
| 178 |
+
- script: L2_NeMo_2_GPT_Speculative_Llama3_TP2PP1
|
| 179 |
+
runner: self-hosted-azure
|
| 180 |
+
- script: L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING
|
| 181 |
+
runner: self-hosted-azure
|
| 182 |
+
- script: L2_NeMo_2_LLAVA_NEXT_HF_CONVERSION
|
| 183 |
+
runner: self-hosted-azure
|
| 184 |
+
- script: L2_NeMo_2_LLAVA_NEXT_ENERGON_TRAIN
|
| 185 |
+
runner: self-hosted-azure
|
| 186 |
+
- script: L2_NeMo_2_LLAVA_NEXT_ENERGON_PACKED_TRAIN
|
| 187 |
+
runner: self-hosted-azure
|
| 188 |
+
- script: L2_NeMo_2_AVLM_MOCK_TRAINING
|
| 189 |
+
runner: self-hosted-azure
|
| 190 |
+
- script: L2_NeMo_2_AVLM_ENERGON_TRAIN
|
| 191 |
+
runner: self-hosted-azure
|
| 192 |
+
- script: L2_NeMo_2_AVLM_ENERGON_CP2_TRAIN
|
| 193 |
+
runner: self-hosted-azure
|
| 194 |
+
- script: L2_NeMo_2_CLIP_PRETRAIN
|
| 195 |
+
runner: self-hosted-azure
|
| 196 |
+
timeout: 20
|
| 197 |
+
- script: L2_NeMo_2_CLIP_INFER
|
| 198 |
+
runner: self-hosted-azure
|
| 199 |
+
- script: L2_NeMo_2_Auto_Configurator_llama_TP1_PP1_MBS124
|
| 200 |
+
runner: self-hosted-azure-gpus-1
|
| 201 |
+
- script: L2_NeMo_2_Auto_Configurator_bert_TP1_PP1_MBS124
|
| 202 |
+
runner: self-hosted-azure-gpus-1
|
| 203 |
+
- script: L2_NeMo_2_Auto_Configurator_t5_TP1_PP1_MBS124
|
| 204 |
+
runner: self-hosted-azure-gpus-1
|
| 205 |
+
- script: L2_NeMo_2_Auto_Configurator_callbacks
|
| 206 |
+
runner: self-hosted-azure-gpus-1
|
| 207 |
+
- script: L2_NeMo_2_Conversion_Test_Baichuan2
|
| 208 |
+
runner: self-hosted-azure
|
| 209 |
+
- script: L2_NeMo_2_Conversion_Test_ChatGLM
|
| 210 |
+
runner: self-hosted-azure
|
| 211 |
+
- script: L2_NeMo_2_Conversion_Test_DeepSeek
|
| 212 |
+
runner: self-hosted-azure
|
| 213 |
+
- script: L2_NeMo_2_Conversion_Test_Gemma
|
| 214 |
+
runner: self-hosted-azure
|
| 215 |
+
- script: L2_NeMo_2_Conversion_Test_Gemma2
|
| 216 |
+
runner: self-hosted-azure
|
| 217 |
+
- script: L2_NeMo_2_Conversion_Test_Gemma3_llm
|
| 218 |
+
runner: self-hosted-azure
|
| 219 |
+
- script: L2_NeMo_2_Conversion_Test_Gemma3_vlm
|
| 220 |
+
runner: self-hosted-azure
|
| 221 |
+
- script: L2_NeMo_2_Conversion_Test_Mistral
|
| 222 |
+
runner: self-hosted-azure
|
| 223 |
+
- script: L2_NeMo_2_Conversion_Test_Llama
|
| 224 |
+
runner: self-hosted-azure
|
| 225 |
+
- script: L2_NeMo_2_Conversion_Test_Llama_Embedding
|
| 226 |
+
runner: self-hosted-azure
|
| 227 |
+
- script: L2_NeMo_2_Conversion_Test_Llama4
|
| 228 |
+
runner: self-hosted-azure
|
| 229 |
+
- script: L2_NeMo_2_Conversion_Test_Llama4_Text
|
| 230 |
+
runner: self-hosted-azure
|
| 231 |
+
- script: L2_NeMo_2_PTQ_Llama4_FP8_nemo
|
| 232 |
+
runner: self-hosted-azure
|
| 233 |
+
- script: L2_NeMo_2_Conversion_Test_Nemotron
|
| 234 |
+
runner: self-hosted-azure
|
| 235 |
+
- script: L2_NeMo_2_Conversion_Test_Nemotron_H_4B
|
| 236 |
+
runner: self-hosted-azure
|
| 237 |
+
- script: L2_NeMo_2_Conversion_Test_Phi3Mini
|
| 238 |
+
runner: self-hosted-azure
|
| 239 |
+
- script: L2_NeMo_2_Conversion_Test_Qwen2
|
| 240 |
+
runner: self-hosted-azure
|
| 241 |
+
- script: L2_NeMo_2_Conversion_Test_Qwen3
|
| 242 |
+
runner: self-hosted-azure
|
| 243 |
+
- script: L2_NeMo_2_Conversion_Test_Starcoder
|
| 244 |
+
runner: self-hosted-azure
|
| 245 |
+
- script: L2_NeMo_2_Conversion_Test_Starcoder2
|
| 246 |
+
runner: self-hosted-azure
|
| 247 |
+
- script: L2_NeMo_2_Conversion_Test_BERT
|
| 248 |
+
runner: self-hosted-azure
|
| 249 |
+
- script: L2_NeMo_2_Conversion_Test_T5
|
| 250 |
+
runner: self-hosted-azure
|
| 251 |
+
- runner: self-hosted-azure
|
| 252 |
+
script: L2_NeMo_2_QWEN2VL_MOCK_FINETUNE_TP2
|
| 253 |
+
- runner: self-hosted-azure
|
| 254 |
+
script: L2_NeMo_2_QWEN2VL_PRELOADED_FINETUNE_TP2
|
| 255 |
+
- runner: self-hosted-azure
|
| 256 |
+
script: L2_NeMo_2_QWEN2VL_ENERGON_FINETUNE_TP2
|
| 257 |
+
- runner: self-hosted-azure
|
| 258 |
+
script: L2_NeMo_2_LLAMA4_MOCK_FINETUNE_PP2
|
| 259 |
+
- runner: self-hosted-azure
|
| 260 |
+
script: L2_NeMo_2_LLAMA4_MOCK_FINETUNE_CP2
|
| 261 |
+
- runner: self-hosted-azure
|
| 262 |
+
script: L2_NeMo_2_LLAMA4_ENERGON_FINETUNE_EP2
|
| 263 |
+
- runner: self-hosted-azure
|
| 264 |
+
script: L2_NeMo_2_Diffusion_Recipe_Test
|
| 265 |
+
- runner: self-hosted-azure
|
| 266 |
+
script: L2_NeMo_2_Diffusion_Taskencoder_Test
|
| 267 |
+
- runner: self-hosted-azure
|
| 268 |
+
script: L2_NeMo_2_Flux_Import_Test
|
| 269 |
+
is-optional: true
|
| 270 |
+
- runner: self-hosted-azure
|
| 271 |
+
script: L2_NeMo_2_Flux_Inference_Test
|
| 272 |
+
- runner: self-hosted-azure
|
| 273 |
+
script: L2_NeMo_2_Flux_Training_DDP_Test
|
| 274 |
+
- runner: self-hosted-azure
|
| 275 |
+
script: L2_NeMo_2_Flux_Training_FSDP_Test
|
| 276 |
+
- runner: self-hosted-azure
|
| 277 |
+
script: L2_NeMo_2_Flux_ControlNet_Training_DDP_Test
|
| 278 |
+
- runner: self-hosted-azure
|
| 279 |
+
script: L2_NeMo_2_Flux_ControlNet_Training_FSDP_Test
|
| 280 |
+
is-optional: true
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
needs: [build]
|
| 284 |
+
runs-on: ${{ matrix.runner }}
|
| 285 |
+
name: ${{ matrix.is-optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
|
| 286 |
+
steps:
|
| 287 |
+
- name: Checkout
|
| 288 |
+
uses: actions/checkout@v4
|
| 289 |
+
with:
|
| 290 |
+
path: ${{ github.run_id }}
|
| 291 |
+
- name: main
|
| 292 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 293 |
+
with:
|
| 294 |
+
runner: ${{ runner.name }}
|
| 295 |
+
script: ${{ matrix.script }}
|
| 296 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 297 |
+
image: ${{ inputs.image-name }}
|
| 298 |
+
is_optional: ${{ matrix.is-optional || false }}
|
| 299 |
+
timeout: ${{ matrix.timeout || 10 }}
|
.github/workflows/cicd-main-speech.yml
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: NeMo E2E Speech Tests
|
| 15 |
+
on:
|
| 16 |
+
workflow_call:
|
| 17 |
+
inputs:
|
| 18 |
+
test_to_run:
|
| 19 |
+
required: true
|
| 20 |
+
type: string
|
| 21 |
+
image-name:
|
| 22 |
+
required: false
|
| 23 |
+
default: nemo_container_speech
|
| 24 |
+
type: string
|
| 25 |
+
|
| 26 |
+
jobs:
|
| 27 |
+
build:
|
| 28 |
+
uses: ./.github/workflows/_build_container.yml
|
| 29 |
+
with:
|
| 30 |
+
image-name: ${{ inputs.image-name }}
|
| 31 |
+
dockerfile: docker/Dockerfile.ci
|
| 32 |
+
|
| 33 |
+
unit-tests:
|
| 34 |
+
strategy:
|
| 35 |
+
fail-fast: false
|
| 36 |
+
matrix:
|
| 37 |
+
include:
|
| 38 |
+
- script: L0_Unit_Tests_GPU_ASR
|
| 39 |
+
runner: self-hosted-azure-gpus-1
|
| 40 |
+
timeout: 30
|
| 41 |
+
- script: L0_Unit_Tests_CPU_ASR
|
| 42 |
+
runner: azure-gpu-vm-runner1-cpu
|
| 43 |
+
cpu-only: true
|
| 44 |
+
timeout: 30
|
| 45 |
+
- script: L0_Unit_Tests_GPU_TTS
|
| 46 |
+
runner: self-hosted-azure-gpus-1
|
| 47 |
+
- script: L0_Unit_Tests_CPU_TTS
|
| 48 |
+
runner: self-hosted-azure-cpu
|
| 49 |
+
cpu-only: true
|
| 50 |
+
- script: L0_Unit_Tests_GPU_Audio
|
| 51 |
+
runner: self-hosted-azure-gpus-1
|
| 52 |
+
- script: L0_Unit_Tests_CPU_Audio
|
| 53 |
+
runner: self-hosted-azure-cpu
|
| 54 |
+
cpu-only: true
|
| 55 |
+
- script: L0_Unit_Tests_GPU_SpeechLM2
|
| 56 |
+
runner: self-hosted-azure-gpus-1
|
| 57 |
+
timeout: 20
|
| 58 |
+
- script: L0_Unit_Tests_CPU_SpeechLM2
|
| 59 |
+
runner: self-hosted-azure-cpu
|
| 60 |
+
cpu-only: true
|
| 61 |
+
timeout: 20
|
| 62 |
+
needs: [build]
|
| 63 |
+
runs-on: ${{ matrix.runner }}
|
| 64 |
+
name: ${{ matrix.script }}
|
| 65 |
+
steps:
|
| 66 |
+
- name: Checkout
|
| 67 |
+
uses: actions/checkout@v4
|
| 68 |
+
with:
|
| 69 |
+
path: ${{ github.run_id }}
|
| 70 |
+
- name: main
|
| 71 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 72 |
+
with:
|
| 73 |
+
runner: ${{ runner.name }}
|
| 74 |
+
script: ${{ matrix.script }}
|
| 75 |
+
is_unit_test: true
|
| 76 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 77 |
+
image: ${{ inputs.image-name }}
|
| 78 |
+
timeout: ${{ matrix.timeout || 10 }}
|
| 79 |
+
cpu-only: ${{ matrix.cpu-only || false }}
|
| 80 |
+
is_optional: ${{ matrix.is-optional || false }}
|
| 81 |
+
|
| 82 |
+
e2e-tests:
|
| 83 |
+
strategy:
|
| 84 |
+
fail-fast: false
|
| 85 |
+
matrix:
|
| 86 |
+
include:
|
| 87 |
+
- runner: self-hosted-azure-gpus-1
|
| 88 |
+
script: ASR_dev_run_Speech_to_Text
|
| 89 |
+
- runner: self-hosted-azure-gpus-1
|
| 90 |
+
script: ASR_dev_run_Speech_to_Text_WPE_CitriNet
|
| 91 |
+
- runner: self-hosted-azure-gpus-1
|
| 92 |
+
script: ASR_dev_run_Speech_Pre-training_-_CitriNet
|
| 93 |
+
- runner: self-hosted-azure-gpus-1
|
| 94 |
+
script: Optional_ASR_dev_run_Speech_To_Text_Finetuning
|
| 95 |
+
is-optional: true
|
| 96 |
+
- runner: self-hosted-azure-gpus-1
|
| 97 |
+
script: Optional_ASR_dev_run_Speech_To_Text_HF_Finetuning
|
| 98 |
+
is-optional: true
|
| 99 |
+
- runner: self-hosted-azure-gpus-1
|
| 100 |
+
script: ASR_dev_run_Speech_to_Text_WPE_-_Conformer
|
| 101 |
+
- runner: self-hosted-azure-gpus-1
|
| 102 |
+
script: ASR_dev_run_Speech_to_Text_Hybrid_RNNT_CTC_Prompt
|
| 103 |
+
- runner: self-hosted-azure-gpus-1
|
| 104 |
+
script: ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer
|
| 105 |
+
- runner: self-hosted-azure-gpus-1
|
| 106 |
+
script: L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader
|
| 107 |
+
- runner: self-hosted-azure-gpus-1
|
| 108 |
+
script: L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader
|
| 109 |
+
- runner: self-hosted-azure-gpus-1
|
| 110 |
+
script: L2_ASR_Adapters_Linear_Adapters
|
| 111 |
+
- runner: self-hosted-azure-gpus-1
|
| 112 |
+
script: L2_ASR_Adapters_RelPos_MHA_Adapters
|
| 113 |
+
- runner: self-hosted-azure
|
| 114 |
+
script: L2_Speech_to_Text_EMA
|
| 115 |
+
- runner: self-hosted-azure-gpus-1
|
| 116 |
+
script: L2_Speech_to_Text_AED
|
| 117 |
+
- runner: self-hosted-azure-gpus-1
|
| 118 |
+
script: L2_Speaker_dev_run_Speech_to_Label
|
| 119 |
+
- runner: self-hosted-azure
|
| 120 |
+
script: L2_Speech_Estimate_Duration_Bins
|
| 121 |
+
- runner: self-hosted-azure
|
| 122 |
+
script: L2_Speech_Batch_Size_OOMptimizer
|
| 123 |
+
- runner: self-hosted-azure
|
| 124 |
+
script: Optional_L2_Speech_Batch_Size_OOMptimizer_Canary
|
| 125 |
+
is-optional: true
|
| 126 |
+
- runner: self-hosted-azure
|
| 127 |
+
script: L2_Speech_Transcription_Speech_to_Text_Transcribe
|
| 128 |
+
- runner: self-hosted-azure
|
| 129 |
+
script: L2_Speech_Transcription_Speech_to_Text_Streaming_Infer
|
| 130 |
+
- runner: self-hosted-azure
|
| 131 |
+
script: L2_Speech_Transcription_Speech_to_Text_Cache_Aware_Infer
|
| 132 |
+
- runner: self-hosted-azure
|
| 133 |
+
script: L2_Speech_Transcription_Streaming_Inference
|
| 134 |
+
- runner: self-hosted-azure
|
| 135 |
+
script: L2_Speech_Transcription_Canary_Transcribe_Full_Manifest
|
| 136 |
+
- runner: self-hosted-azure
|
| 137 |
+
script: L2_Speech_Transcription_Canary_Transcribe_With_Prompt
|
| 138 |
+
- runner: self-hosted-azure
|
| 139 |
+
script: L2_Speech_Transcription_Canary_Transcribe_Audio_Dir
|
| 140 |
+
- runner: self-hosted-azure
|
| 141 |
+
script: L2_Speech_Transcription_Canary_Streaming_Full_Manifest
|
| 142 |
+
- runner: self-hosted-azure
|
| 143 |
+
script: L2_Longform_Speech_Transcription_Canary_Chunked_Infer_from_Audio_Dir
|
| 144 |
+
- runner: self-hosted-azure
|
| 145 |
+
script: L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Audio_Dir
|
| 146 |
+
- runner: self-hosted-azure
|
| 147 |
+
script: L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Manifest
|
| 148 |
+
- runner: self-hosted-azure-gpus-1
|
| 149 |
+
script: Speech_Checkpoints_tests
|
| 150 |
+
timeout: 20
|
| 151 |
+
- runner: self-hosted-azure-gpus-1
|
| 152 |
+
script: L2_Speaker_dev_run_Speaker_Recognition
|
| 153 |
+
- runner: self-hosted-azure-gpus-1
|
| 154 |
+
script: L2_Speaker_dev_run_Speaker_Diarization
|
| 155 |
+
- runner: self-hosted-azure-gpus-1
|
| 156 |
+
script: L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer
|
| 157 |
+
- runner: self-hosted-azure
|
| 158 |
+
script: L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference
|
| 159 |
+
- runner: self-hosted-azure
|
| 160 |
+
script: L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference
|
| 161 |
+
- runner: self-hosted-azure
|
| 162 |
+
script: L2_Speaker_dev_run_Clustering_Diarizer_Inference
|
| 163 |
+
- runner: self-hosted-azure
|
| 164 |
+
script: L2_Speaker_dev_run_Neural_Diarizer_Inference
|
| 165 |
+
- runner: self-hosted-azure
|
| 166 |
+
script: L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation
|
| 167 |
+
- runner: self-hosted-azure
|
| 168 |
+
script: L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav
|
| 169 |
+
- runner: self-hosted-azure
|
| 170 |
+
script: L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3
|
| 171 |
+
- script: L2_SpeechLM_LoRA_TP1PP1_MBS2
|
| 172 |
+
runner: self-hosted-azure
|
| 173 |
+
- runner: self-hosted-azure-gpus-1
|
| 174 |
+
script: L2_TTS_Fast_dev_runs_1_Tacotron_2
|
| 175 |
+
- runner: self-hosted-azure
|
| 176 |
+
script: L2_TTS_Fast_dev_runs_1_WaveGlow
|
| 177 |
+
- runner: self-hosted-azure
|
| 178 |
+
script: L2_TTS_Fast_dev_runs_1_FastPitch
|
| 179 |
+
- runner: self-hosted-azure
|
| 180 |
+
script: L2_TTS_Fast_dev_runs_1_Hifigan
|
| 181 |
+
- runner: self-hosted-azure
|
| 182 |
+
script: L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference
|
| 183 |
+
- runner: self-hosted-azure
|
| 184 |
+
script: SPEECHLM_HF_Training_DuplexS2S
|
| 185 |
+
- runner: self-hosted-azure
|
| 186 |
+
script: SPEECHLM_HF_Training_DuplexS2SSpeechDecoder
|
| 187 |
+
- runner: self-hosted-azure
|
| 188 |
+
script: SPEECHLM_HF_Training_SALM
|
| 189 |
+
timeout: 20
|
| 190 |
+
- runner: self-hosted-azure
|
| 191 |
+
script: L2_TTS_Fast_dev_runs_Magpietts_DecoderContext
|
| 192 |
+
- runner: self-hosted-azure
|
| 193 |
+
script: L2_TTS_Fast_dev_runs_Magpietts_MultiEncoder
|
| 194 |
+
- runner: self-hosted-azure
|
| 195 |
+
script: L2_TTS_Fast_dev_runs_Magpietts_OnlinePO
|
| 196 |
+
- runner: self-hosted-azure
|
| 197 |
+
script: L2_TTS_InferEvaluate_Magpietts_ZeroShot
|
| 198 |
+
- runner: self-hosted-azure
|
| 199 |
+
script: L2_TTS_InferEvaluate_Magpietts_SeenSpeakers
|
| 200 |
+
needs: [unit-tests]
|
| 201 |
+
runs-on: ${{ matrix.runner }}
|
| 202 |
+
name: ${{ matrix.is-optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
|
| 203 |
+
steps:
|
| 204 |
+
- name: Checkout
|
| 205 |
+
uses: actions/checkout@v4
|
| 206 |
+
with:
|
| 207 |
+
path: ${{ github.run_id }}
|
| 208 |
+
- name: main
|
| 209 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 210 |
+
with:
|
| 211 |
+
runner: ${{ runner.name }}
|
| 212 |
+
script: ${{ matrix.script }}
|
| 213 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 214 |
+
image: ${{ inputs.image-name }}
|
| 215 |
+
timeout: ${{ matrix.timeout || 10 }}
|
| 216 |
+
is_optional: ${{ matrix.is-optional || false }}
|
.github/workflows/cicd-main-testcopy.yml
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: "[debug] CICD NeMo"
|
| 15 |
+
on:
|
| 16 |
+
schedule:
|
| 17 |
+
- cron: 0 0 * * *
|
| 18 |
+
- cron: "*/5 * * * *" # Runs every 5 minutes
|
| 19 |
+
push:
|
| 20 |
+
branches:
|
| 21 |
+
- main
|
| 22 |
+
workflow_dispatch:
|
| 23 |
+
inputs:
|
| 24 |
+
test_to_run:
|
| 25 |
+
required: false
|
| 26 |
+
default: all
|
| 27 |
+
type: string
|
| 28 |
+
description: Comma-separated list of tests to run. Use "all" to run the full test suite.
|
| 29 |
+
|
| 30 |
+
jobs:
|
| 31 |
+
pre-flight:
|
| 32 |
+
runs-on: ubuntu-latest
|
| 33 |
+
outputs:
|
| 34 |
+
test_to_run: ${{ steps.test_to_run.outputs.main }}
|
| 35 |
+
is_ci_workload: ${{ steps.is_ci_workload.outputs.main }}
|
| 36 |
+
no_fail_fast: ${{ steps.no_fail_fast.outputs.main }}
|
| 37 |
+
components_to_run: ${{ steps.components_to_run.outputs.main }}
|
| 38 |
+
env:
|
| 39 |
+
TESTS_TO_RUN: ${{ inputs.test_to_run }}
|
| 40 |
+
EVENT_NAME: ${{ github.event_name }}
|
| 41 |
+
HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }}
|
| 42 |
+
steps:
|
| 43 |
+
- name: Checkout branch
|
| 44 |
+
uses: actions/checkout@v4
|
| 45 |
+
with:
|
| 46 |
+
fetch-depth: 0
|
| 47 |
+
|
| 48 |
+
- name: Select components to run
|
| 49 |
+
id: components_to_run
|
| 50 |
+
run: |
|
| 51 |
+
pip install -U pip
|
| 52 |
+
pip install git-python
|
| 53 |
+
|
| 54 |
+
if [[ "$EVENT_NAME" == "pull_request" ]]; then
|
| 55 |
+
python .github/scripts/components_to_run.py --source-sha ${{ github.event.pull_request.head.sha }} --target-sha ${{ github.event.pull_request.base.sha }}
|
| 56 |
+
else
|
| 57 |
+
echo '["nemo2", "automodel", "export-deploy", "speech"]' | tee -a test_modules.json
|
| 58 |
+
fi
|
| 59 |
+
|
| 60 |
+
components_to_run=$(cat test_modules.json)
|
| 61 |
+
|
| 62 |
+
echo "main=${components_to_run}" | tee -a "$GITHUB_OUTPUT"
|
| 63 |
+
|
| 64 |
+
- name: Select tests to run
|
| 65 |
+
id: test_to_run
|
| 66 |
+
run: |
|
| 67 |
+
# For manual dispatch, we replace `all` with the actual job names
|
| 68 |
+
if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then
|
| 69 |
+
TESTS_TO_RUN=$TESTS_TO_RUN
|
| 70 |
+
|
| 71 |
+
# For correctly labeled PR, we replace `all` with the actual job names
|
| 72 |
+
elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" == "true" ]]; then
|
| 73 |
+
TESTS_TO_RUN=all
|
| 74 |
+
|
| 75 |
+
# For incorrectly labeled PR, run no tests
|
| 76 |
+
elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" != "true" ]]; then
|
| 77 |
+
TESTS_TO_RUN=""
|
| 78 |
+
|
| 79 |
+
# For push events, run all tests. This is so that we can generate coverage
|
| 80 |
+
# on branch `main`.
|
| 81 |
+
elif [[ "$EVENT_NAME" == "push" || "$EVENT_NAME" == "schedule" ]]; then
|
| 82 |
+
TESTS_TO_RUN=all
|
| 83 |
+
|
| 84 |
+
else
|
| 85 |
+
echo "Unsupported event_name $EVENT_NAME provided".
|
| 86 |
+
exit 1
|
| 87 |
+
fi
|
| 88 |
+
|
| 89 |
+
parsed_string=$(echo "$TESTS_TO_RUN" | jq -c --raw-input 'split(",")')
|
| 90 |
+
echo "main=${parsed_string}" | tee -a "$GITHUB_OUTPUT"
|
| 91 |
+
|
| 92 |
+
- name: Check if this is a CI workload
|
| 93 |
+
shell: bash
|
| 94 |
+
id: is_ci_workload
|
| 95 |
+
run: |
|
| 96 |
+
branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
|
| 97 |
+
|
| 98 |
+
if [[ "$branch_name" =~ ^bump-ci-container || "$EVENT_NAME" == "schedule" ]]; then
|
| 99 |
+
is_ci_workload=true
|
| 100 |
+
echo "main=true" | tee -a "$GITHUB_OUTPUT"
|
| 101 |
+
else
|
| 102 |
+
is_ci_workload=false
|
| 103 |
+
fi
|
| 104 |
+
|
| 105 |
+
echo "main=$is_ci_workload" | tee -a "$GITHUB_OUTPUT"
|
| 106 |
+
|
| 107 |
+
- name: Check if no-fail-fast is set
|
| 108 |
+
shell: bash
|
| 109 |
+
id: no_fail_fast
|
| 110 |
+
env:
|
| 111 |
+
HAS_FAIL_FAST_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'no-fail-fast') }}
|
| 112 |
+
run: |
|
| 113 |
+
if [[ "$HAS_FAIL_FAST_LABEL" == "true" || "$EVENT_NAME" == "schedule" ]]; then
|
| 114 |
+
no_fail_fast=true
|
| 115 |
+
else
|
| 116 |
+
no_fail_fast=false
|
| 117 |
+
fi
|
| 118 |
+
|
| 119 |
+
echo "main=$no_fail_fast" | tee -a "$GITHUB_OUTPUT"
|
| 120 |
+
|
| 121 |
+
code-linting:
|
| 122 |
+
if: needs.pre-flight.outputs.test_to_run != '[]'
|
| 123 |
+
needs: [pre-flight]
|
| 124 |
+
uses: ./.github/workflows/code-linting.yml
|
| 125 |
+
|
| 126 |
+
cicd-wait-in-queue:
|
| 127 |
+
needs: [pre-flight]
|
| 128 |
+
runs-on: ubuntu-latest
|
| 129 |
+
environment: test
|
| 130 |
+
if: |
|
| 131 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 132 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'false'
|
| 133 |
+
steps:
|
| 134 |
+
- name: Running CI tests
|
| 135 |
+
run: |
|
| 136 |
+
echo "Running CI tests"
|
| 137 |
+
|
| 138 |
+
cicd-test-container-build:
|
| 139 |
+
uses: ./.github/workflows/_build_container.yml
|
| 140 |
+
needs: [pre-flight, code-linting, cicd-wait-in-queue]
|
| 141 |
+
if: |
|
| 142 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 143 |
+
&& (
|
| 144 |
+
success()
|
| 145 |
+
|| (
|
| 146 |
+
needs.cicd-wait-in-queue.result == 'skipped'
|
| 147 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 148 |
+
)
|
| 149 |
+
)
|
| 150 |
+
&& !cancelled()
|
| 151 |
+
with:
|
| 152 |
+
image-name: nemo_container
|
| 153 |
+
dockerfile: docker/Dockerfile.ci
|
| 154 |
+
|
| 155 |
+
# cicd-import-tests:
|
| 156 |
+
# if: |
|
| 157 |
+
# needs.pre-flight.outputs.test_to_run != '[]'
|
| 158 |
+
# && (
|
| 159 |
+
# success()
|
| 160 |
+
# || (
|
| 161 |
+
# needs.cicd-wait-in-queue.result == 'skipped'
|
| 162 |
+
# && needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 163 |
+
# )
|
| 164 |
+
# )
|
| 165 |
+
# && !cancelled()
|
| 166 |
+
# needs: [cicd-test-container-build, pre-flight]
|
| 167 |
+
# runs-on: self-hosted-azure-gpus-1
|
| 168 |
+
# steps:
|
| 169 |
+
# - name: Create UUID
|
| 170 |
+
# id: uuid
|
| 171 |
+
# run: |
|
| 172 |
+
# echo "id=$(uuidgen)" >> "$GITHUB_OUTPUT"
|
| 173 |
+
|
| 174 |
+
# - name: Checkout NeMo
|
| 175 |
+
# uses: actions/checkout@v2
|
| 176 |
+
# with:
|
| 177 |
+
# repository: NVIDIA/NeMo
|
| 178 |
+
# path: ${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo
|
| 179 |
+
|
| 180 |
+
# - name: Run some checks
|
| 181 |
+
# run: |
|
| 182 |
+
# docker run \
|
| 183 |
+
# --rm \
|
| 184 |
+
# --device=/dev/nvidia0 \
|
| 185 |
+
# --gpus all \
|
| 186 |
+
# --shm-size=8g \
|
| 187 |
+
# --volume $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo:/workspace \
|
| 188 |
+
# --env TRANSFORMERS_OFFLINE=0 \
|
| 189 |
+
# --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\
|
| 190 |
+
# # PyTorch Lightning version
|
| 191 |
+
# python -c "import lightning.pytorch; print(lightning.pytorch.__version__)"
|
| 192 |
+
|
| 193 |
+
# # PyTorch Lightning DDP Checks
|
| 194 |
+
# CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py"
|
| 195 |
+
|
| 196 |
+
# # Basic Import Checks
|
| 197 |
+
# python tests/core_ptl/check_imports.py --domain asr
|
| 198 |
+
# python tests/core_ptl/check_imports.py --domain nlp
|
| 199 |
+
# python tests/core_ptl/check_imports.py --domain tts
|
| 200 |
+
# '
|
| 201 |
+
|
| 202 |
+
# L0_Setup_Test_Data_And_Models:
|
| 203 |
+
# needs: [pre-flight, cicd-test-container-build, cicd-wait-in-queue]
|
| 204 |
+
# runs-on: self-hosted-azure
|
| 205 |
+
# if: |
|
| 206 |
+
# needs.pre-flight.outputs.test_to_run != '[]'
|
| 207 |
+
# && (
|
| 208 |
+
# success()
|
| 209 |
+
# || (
|
| 210 |
+
# needs.cicd-wait-in-queue.result == 'skipped'
|
| 211 |
+
# && needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 212 |
+
# )
|
| 213 |
+
# )
|
| 214 |
+
# && !cancelled()
|
| 215 |
+
# steps:
|
| 216 |
+
# - name: Checkout
|
| 217 |
+
# uses: actions/checkout@v4
|
| 218 |
+
# with:
|
| 219 |
+
# path: ${{ github.run_id }}
|
| 220 |
+
|
| 221 |
+
# - name: main
|
| 222 |
+
# uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 223 |
+
# with:
|
| 224 |
+
# runner: ${{ runner.name }}
|
| 225 |
+
# script: L0_Setup_Test_Data_And_Models
|
| 226 |
+
# tests_to_run: '["L0_Setup_Test_Data_And_Models"]'
|
| 227 |
+
|
| 228 |
+
# cicd-main-unit-tests:
|
| 229 |
+
# needs: [pre-flight, cicd-test-container-build]
|
| 230 |
+
# uses: ./.github/workflows/cicd-main-unit-tests.yml
|
| 231 |
+
# if: |
|
| 232 |
+
# needs.pre-flight.outputs.test_to_run != '[]'
|
| 233 |
+
# && (
|
| 234 |
+
# success()
|
| 235 |
+
# || (
|
| 236 |
+
# needs.cicd-wait-in-queue.result == 'skipped'
|
| 237 |
+
# && needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 238 |
+
# )
|
| 239 |
+
# )
|
| 240 |
+
# && !cancelled()
|
| 241 |
+
# with:
|
| 242 |
+
# test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
|
| 243 |
+
|
| 244 |
+
# cicd-main-export-deploy:
|
| 245 |
+
# needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
|
| 246 |
+
# uses: ./.github/workflows/cicd-main-export-deploy.yml
|
| 247 |
+
# if: |
|
| 248 |
+
# (
|
| 249 |
+
# needs.pre-flight.outputs.test_to_run != '[]'
|
| 250 |
+
# && (
|
| 251 |
+
# contains(fromJson(needs.pre-flight.outputs.components_to_run), 'export-deploy')
|
| 252 |
+
# )
|
| 253 |
+
# )
|
| 254 |
+
# && (
|
| 255 |
+
# success()
|
| 256 |
+
# || (
|
| 257 |
+
# needs.cicd-wait-in-queue.result == 'skipped'
|
| 258 |
+
# && needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 259 |
+
# )
|
| 260 |
+
# )
|
| 261 |
+
# && !cancelled()
|
| 262 |
+
# with:
|
| 263 |
+
# test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
|
| 264 |
+
|
| 265 |
+
# cicd-main-speech:
|
| 266 |
+
# needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
|
| 267 |
+
# uses: ./.github/workflows/cicd-main-speech.yml
|
| 268 |
+
# if: |
|
| 269 |
+
# (
|
| 270 |
+
# needs.pre-flight.outputs.test_to_run != '[]'
|
| 271 |
+
# && (
|
| 272 |
+
# contains(fromJson(needs.pre-flight.outputs.components_to_run), 'speech')
|
| 273 |
+
# )
|
| 274 |
+
# )
|
| 275 |
+
# && (
|
| 276 |
+
# success()
|
| 277 |
+
# || (
|
| 278 |
+
# needs.cicd-wait-in-queue.result == 'skipped'
|
| 279 |
+
# && needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 280 |
+
# )
|
| 281 |
+
# )
|
| 282 |
+
# && !cancelled()
|
| 283 |
+
# with:
|
| 284 |
+
# test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
|
| 285 |
+
|
| 286 |
+
# cicd-main-automodel:
|
| 287 |
+
# needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
|
| 288 |
+
# uses: ./.github/workflows/cicd-main-automodel.yml
|
| 289 |
+
# if: |
|
| 290 |
+
# (
|
| 291 |
+
# needs.pre-flight.outputs.test_to_run != '[]'
|
| 292 |
+
# && (
|
| 293 |
+
# contains(fromJson(needs.pre-flight.outputs.components_to_run), 'automodel')
|
| 294 |
+
# )
|
| 295 |
+
# )
|
| 296 |
+
# && (
|
| 297 |
+
# success()
|
| 298 |
+
# || (
|
| 299 |
+
# needs.cicd-wait-in-queue.result == 'skipped'
|
| 300 |
+
# && needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 301 |
+
# )
|
| 302 |
+
# )
|
| 303 |
+
# && !cancelled()
|
| 304 |
+
# with:
|
| 305 |
+
# test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
|
| 306 |
+
|
| 307 |
+
# cicd-main-nemo2:
|
| 308 |
+
# needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
|
| 309 |
+
# uses: ./.github/workflows/cicd-main-nemo2.yml
|
| 310 |
+
# if: |
|
| 311 |
+
# (
|
| 312 |
+
# needs.pre-flight.outputs.test_to_run != '[]'
|
| 313 |
+
# && (
|
| 314 |
+
# contains(fromJson(needs.pre-flight.outputs.components_to_run), 'nemo2')
|
| 315 |
+
# || needs.pre-flight.outputs.components_to_run == '["all"]'
|
| 316 |
+
# )
|
| 317 |
+
# )
|
| 318 |
+
# && (
|
| 319 |
+
# success()
|
| 320 |
+
# || (
|
| 321 |
+
# needs.cicd-wait-in-queue.result == 'skipped'
|
| 322 |
+
# && needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 323 |
+
# )
|
| 324 |
+
# )
|
| 325 |
+
# && !cancelled()
|
| 326 |
+
# with:
|
| 327 |
+
# test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
|
| 328 |
+
|
| 329 |
+
Nemo_CICD_Test_Debug:
|
| 330 |
+
needs:
|
| 331 |
+
- pre-flight
|
| 332 |
+
- cicd-test-container-build
|
| 333 |
+
# - cicd-import-tests
|
| 334 |
+
# - L0_Setup_Test_Data_And_Models
|
| 335 |
+
# - cicd-main-unit-tests
|
| 336 |
+
# - cicd-main-nemo2
|
| 337 |
+
# - cicd-main-export-deploy
|
| 338 |
+
# - cicd-main-automodel
|
| 339 |
+
# - cicd-main-speech
|
| 340 |
+
if: always()
|
| 341 |
+
runs-on: ubuntu-latest
|
| 342 |
+
permissions: write-all
|
| 343 |
+
steps:
|
| 344 |
+
- name: Checkout
|
| 345 |
+
uses: actions/checkout@v4
|
| 346 |
+
|
| 347 |
+
- name: Get workflow result
|
| 348 |
+
id: result
|
| 349 |
+
env:
|
| 350 |
+
GH_TOKEN: ${{ github.token }}
|
| 351 |
+
RUN_ID: ${{ github.run_id }}
|
| 352 |
+
|
| 353 |
+
run: |
|
| 354 |
+
# Get workflow run details and check job conclusions
|
| 355 |
+
NUM_FAILED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "failure") | .name] | length')
|
| 356 |
+
NUM_CANCELLED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "cancelled") | .name] | length')
|
| 357 |
+
|
| 358 |
+
if [[ $NUM_FAILED -eq 0 && $NUM_CANCELLED -eq 0 ]]; then
|
| 359 |
+
RESULT="success"
|
| 360 |
+
else
|
| 361 |
+
RESULT="failure"
|
| 362 |
+
fi
|
| 363 |
+
|
| 364 |
+
# Output the final status
|
| 365 |
+
echo "code=$RESULT" | tee -a $GITHUB_OUTPUT
|
| 366 |
+
|
| 367 |
+
- name: Checkout for GH CLI
|
| 368 |
+
uses: actions/checkout@v4
|
| 369 |
+
|
| 370 |
+
- name: Remove label if not cancelled
|
| 371 |
+
if: ${{ steps.result.outputs.code != 'cancelled' && github.event.label.name == 'Run CICD' && github.event.pull_request.head.repo.full_name == github.repository }}
|
| 372 |
+
env:
|
| 373 |
+
GH_TOKEN: ${{ github.token }}
|
| 374 |
+
PR_NUMBER: ${{ github.event.number }}
|
| 375 |
+
run: gh pr edit "$PR_NUMBER" --remove-label "Run CICD"
|
| 376 |
+
|
| 377 |
+
- name: Pipeline successful, add PR comment
|
| 378 |
+
if: ${{ always() && steps.result.outputs.code == 'success' && github.event_name == 'pull_request' && env.SLACK_WEBHOOK != '' }}
|
| 379 |
+
uses: peter-evans/create-or-update-comment@v4
|
| 380 |
+
env:
|
| 381 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
| 382 |
+
REPOSITORY: ${{ github.repository }}
|
| 383 |
+
RUN_ID: ${{ github.run_id }}
|
| 384 |
+
with:
|
| 385 |
+
issue-number: ${{ github.event.number }}
|
| 386 |
+
body: |
|
| 387 |
+
[🤖]: Hi @${{ github.event.pull_request.user.login }} 👋,
|
| 388 |
+
|
| 389 |
+
We wanted to let you know that a [CICD pipeline](https://github.com/${{ env.REPOSITORY }}/actions/runs/${{ env.RUN_ID }}) for this PR just finished successfully.
|
| 390 |
+
|
| 391 |
+
So it might be time to merge this PR or get some approvals.
|
| 392 |
+
|
| 393 |
+
Due to a major CI change, merges are currently handled by the automation team.
|
| 394 |
+
We will reach out to you quickly to merge this PR, but you can always reach us with the following handles:
|
| 395 |
+
|
| 396 |
+
//cc @chtruong814 @ko3n1g @pablo-garay @thomasdhc
|
| 397 |
+
|
| 398 |
+
- name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary"
|
| 399 |
+
if: ${{ always() && steps.result.outputs.code == 'failure' && env.SLACK_WEBHOOK != '' }}
|
| 400 |
+
env:
|
| 401 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
| 402 |
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 403 |
+
REPOSITORY: ${{ github.repository }}
|
| 404 |
+
RUN_ID: ${{ github.run_id }}
|
| 405 |
+
PR_NUMBER: ${{ github.event.number }}
|
| 406 |
+
SERVER_URL: ${{ github.server_url }}
|
| 407 |
+
run: |
|
| 408 |
+
set -x
|
| 409 |
+
pip install PyGithub
|
| 410 |
+
export BRANCH_NAME=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
|
| 411 |
+
|
| 412 |
+
python .github/scripts/notify.py
|
| 413 |
+
|
| 414 |
+
- name: Exit
|
| 415 |
+
if: ${{ always() }}
|
| 416 |
+
env:
|
| 417 |
+
RESULT: ${{ steps.result.outputs.code }}
|
| 418 |
+
run: |
|
| 419 |
+
if [ $RESULT == "success" ]; then
|
| 420 |
+
exit 0
|
| 421 |
+
else
|
| 422 |
+
exit 1
|
| 423 |
+
fi
|
| 424 |
+
|
| 425 |
+
Coverage:
|
| 426 |
+
runs-on: ubuntu-latest
|
| 427 |
+
needs: [Nemo_CICD_Test_Debug]
|
| 428 |
+
strategy:
|
| 429 |
+
matrix:
|
| 430 |
+
flag: [unit-test, e2e]
|
| 431 |
+
if: |
|
| 432 |
+
(
|
| 433 |
+
success()
|
| 434 |
+
|| needs.Nemo_CICD_Test.result == 'success'
|
| 435 |
+
)
|
| 436 |
+
&& !cancelled()
|
| 437 |
+
steps:
|
| 438 |
+
- name: Checkout
|
| 439 |
+
uses: actions/checkout@v4
|
| 440 |
+
|
| 441 |
+
- name: Download coverage reports of current branch
|
| 442 |
+
uses: actions/download-artifact@v4
|
| 443 |
+
with:
|
| 444 |
+
pattern: coverage-${{ matrix.flag }}-*
|
| 445 |
+
|
| 446 |
+
- name: Get total coverage of current branch
|
| 447 |
+
shell: bash -x -e -u -o pipefail {0}
|
| 448 |
+
if: always()
|
| 449 |
+
run: |
|
| 450 |
+
pip install coverage
|
| 451 |
+
|
| 452 |
+
ls -al .
|
| 453 |
+
ls -al coverage-*/
|
| 454 |
+
coverage combine --keep $(ls coverage-*/.coverage)
|
| 455 |
+
coverage report -i
|
| 456 |
+
rm -rf coverage-*
|
| 457 |
+
ls -al
|
| 458 |
+
|
| 459 |
+
- name: Upload coverage reports to Codecov
|
| 460 |
+
uses: codecov/codecov-action@v5
|
| 461 |
+
with:
|
| 462 |
+
token: ${{ secrets.CODECOV_TOKEN }}
|
| 463 |
+
verbose: true
|
| 464 |
+
flags: ${{ matrix.flag }}
|
| 465 |
+
|
| 466 |
+
- name: Upload artifacts
|
| 467 |
+
uses: actions/upload-artifact@v4
|
| 468 |
+
with:
|
| 469 |
+
name: coverage-${{ matrix.flag }}-aggregated
|
| 470 |
+
path: |
|
| 471 |
+
.coverage
|
| 472 |
+
include-hidden-files: true
|
.github/workflows/cicd-main-unit-tests.yml
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: NeMo Unit Tests
|
| 15 |
+
on:
|
| 16 |
+
workflow_call:
|
| 17 |
+
inputs:
|
| 18 |
+
test_to_run:
|
| 19 |
+
required: true
|
| 20 |
+
type: string
|
| 21 |
+
|
| 22 |
+
jobs:
|
| 23 |
+
collections-common-tests:
|
| 24 |
+
strategy:
|
| 25 |
+
fail-fast: false
|
| 26 |
+
matrix:
|
| 27 |
+
include:
|
| 28 |
+
- script: L0_Unit_Tests_GPU_Common
|
| 29 |
+
runner: self-hosted-azure-gpus-1
|
| 30 |
+
- script: L0_Unit_Tests_CPU_Common
|
| 31 |
+
runner: self-hosted-azure-cpu
|
| 32 |
+
cpu-only: true
|
| 33 |
+
runs-on: ${{ matrix.runner }}
|
| 34 |
+
name: ${{ matrix.script }}
|
| 35 |
+
steps:
|
| 36 |
+
- name: Checkout
|
| 37 |
+
uses: actions/checkout@v4
|
| 38 |
+
with:
|
| 39 |
+
path: ${{ github.run_id }}
|
| 40 |
+
- name: main
|
| 41 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 42 |
+
with:
|
| 43 |
+
runner: ${{ runner.name }}
|
| 44 |
+
script: ${{ matrix.script }}
|
| 45 |
+
is_unit_test: true
|
| 46 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 47 |
+
cpu-only: ${{ matrix.cpu-only || false }}
|
| 48 |
+
|
| 49 |
+
collections-llm-tests:
|
| 50 |
+
strategy:
|
| 51 |
+
fail-fast: false
|
| 52 |
+
matrix:
|
| 53 |
+
include:
|
| 54 |
+
- script: L0_Unit_Tests_GPU_LLM
|
| 55 |
+
runner: self-hosted-azure-gpus-1
|
| 56 |
+
- script: L0_Unit_Tests_CPU_LLM
|
| 57 |
+
runner: self-hosted-azure-cpu
|
| 58 |
+
cpu-only: true
|
| 59 |
+
runs-on: ${{ matrix.runner }}
|
| 60 |
+
name: ${{ matrix.script }}
|
| 61 |
+
steps:
|
| 62 |
+
- name: Checkout
|
| 63 |
+
uses: actions/checkout@v4
|
| 64 |
+
with:
|
| 65 |
+
path: ${{ github.run_id }}
|
| 66 |
+
- name: main
|
| 67 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 68 |
+
with:
|
| 69 |
+
runner: ${{ runner.name }}
|
| 70 |
+
script: ${{ matrix.script }}
|
| 71 |
+
is_unit_test: true
|
| 72 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 73 |
+
cpu-only: ${{ matrix.cpu-only || false }}
|
| 74 |
+
is_optional: ${{ matrix.is-optional || false }}
|
| 75 |
+
|
| 76 |
+
collections-multimodal-tests:
|
| 77 |
+
strategy:
|
| 78 |
+
fail-fast: false
|
| 79 |
+
matrix:
|
| 80 |
+
include:
|
| 81 |
+
- script: L0_Unit_Tests_GPU_Multimodal
|
| 82 |
+
runner: self-hosted-azure-gpus-1
|
| 83 |
+
- script: L0_Unit_Tests_CPU_Multimodal
|
| 84 |
+
runner: self-hosted-azure-cpu
|
| 85 |
+
cpu-only: true
|
| 86 |
+
runs-on: ${{ matrix.runner }}
|
| 87 |
+
name: ${{ matrix.script }}
|
| 88 |
+
steps:
|
| 89 |
+
- name: Checkout
|
| 90 |
+
uses: actions/checkout@v4
|
| 91 |
+
with:
|
| 92 |
+
path: ${{ github.run_id }}
|
| 93 |
+
- name: main
|
| 94 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 95 |
+
with:
|
| 96 |
+
runner: ${{ runner.name }}
|
| 97 |
+
script: ${{ matrix.script }}
|
| 98 |
+
is_unit_test: true
|
| 99 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 100 |
+
cpu-only: ${{ matrix.cpu-only || false }}
|
| 101 |
+
is_optional: ${{ matrix.is-optional || false }}
|
| 102 |
+
collections-vlm-tests:
|
| 103 |
+
strategy:
|
| 104 |
+
fail-fast: false
|
| 105 |
+
matrix:
|
| 106 |
+
include:
|
| 107 |
+
- script: L0_Unit_Tests_GPU_VLM
|
| 108 |
+
runner: self-hosted-azure-gpus-1
|
| 109 |
+
- script: L0_Unit_Tests_CPU_VLM
|
| 110 |
+
runner: self-hosted-azure-cpu
|
| 111 |
+
cpu-only: true
|
| 112 |
+
runs-on: ${{ matrix.runner }}
|
| 113 |
+
name: ${{ matrix.script }}
|
| 114 |
+
steps:
|
| 115 |
+
- name: Checkout
|
| 116 |
+
uses: actions/checkout@v4
|
| 117 |
+
with:
|
| 118 |
+
path: ${{ github.run_id }}
|
| 119 |
+
- name: main
|
| 120 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 121 |
+
with:
|
| 122 |
+
runner: ${{ runner.name }}
|
| 123 |
+
script: ${{ matrix.script }}
|
| 124 |
+
is_unit_test: true
|
| 125 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 126 |
+
cpu-only: ${{ matrix.cpu-only || false }}
|
| 127 |
+
is_optional: ${{ matrix.is-optional || false }}
|
| 128 |
+
|
| 129 |
+
core-tests:
|
| 130 |
+
strategy:
|
| 131 |
+
fail-fast: false
|
| 132 |
+
matrix:
|
| 133 |
+
include:
|
| 134 |
+
- script: L0_Unit_Tests_GPU_Core
|
| 135 |
+
runner: self-hosted-azure-gpus-1
|
| 136 |
+
- script: L0_Unit_Tests_CPU_Core
|
| 137 |
+
runner: self-hosted-azure-cpu
|
| 138 |
+
cpu-only: true
|
| 139 |
+
- script: L0_Unit_Tests_GPU_Hydra
|
| 140 |
+
runner: self-hosted-azure-gpus-1
|
| 141 |
+
- script: L0_Unit_Tests_CPU_Hydra
|
| 142 |
+
runner: self-hosted-azure-cpu
|
| 143 |
+
cpu-only: true
|
| 144 |
+
runs-on: ${{ matrix.runner }}
|
| 145 |
+
name: ${{ matrix.script }}
|
| 146 |
+
steps:
|
| 147 |
+
- name: Checkout
|
| 148 |
+
uses: actions/checkout@v4
|
| 149 |
+
with:
|
| 150 |
+
path: ${{ github.run_id }}
|
| 151 |
+
- name: main
|
| 152 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 153 |
+
with:
|
| 154 |
+
runner: ${{ runner.name }}
|
| 155 |
+
script: ${{ matrix.script }}
|
| 156 |
+
is_unit_test: true
|
| 157 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 158 |
+
cpu-only: ${{ matrix.cpu-only || false }}
|
| 159 |
+
|
| 160 |
+
lightning-tests:
|
| 161 |
+
strategy:
|
| 162 |
+
fail-fast: false
|
| 163 |
+
matrix:
|
| 164 |
+
include:
|
| 165 |
+
- script: L0_Unit_Tests_GPU_Lightning
|
| 166 |
+
runner: self-hosted-azure
|
| 167 |
+
- script: L0_Unit_Tests_CPU_Lightning
|
| 168 |
+
runner: self-hosted-azure-cpu
|
| 169 |
+
cpu-only: true
|
| 170 |
+
runs-on: ${{ matrix.runner }}
|
| 171 |
+
name: ${{ matrix.script }}
|
| 172 |
+
steps:
|
| 173 |
+
- name: Checkout
|
| 174 |
+
uses: actions/checkout@v4
|
| 175 |
+
with:
|
| 176 |
+
path: ${{ github.run_id }}
|
| 177 |
+
- name: main
|
| 178 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 179 |
+
with:
|
| 180 |
+
runner: ${{ runner.name }}
|
| 181 |
+
script: ${{ matrix.script }}
|
| 182 |
+
is_unit_test: true
|
| 183 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 184 |
+
cpu-only: ${{ matrix.cpu-only || false }}
|
| 185 |
+
is_optional: ${{ matrix.is-optional || false }}
|
| 186 |
+
|
| 187 |
+
other-tests:
|
| 188 |
+
strategy:
|
| 189 |
+
fail-fast: false
|
| 190 |
+
matrix:
|
| 191 |
+
include:
|
| 192 |
+
- script: L0_Unit_Tests_GPU_Others
|
| 193 |
+
runner: self-hosted-azure-gpus-1
|
| 194 |
+
- script: L0_Unit_Tests_CPU_Others
|
| 195 |
+
runner: self-hosted-azure-cpu
|
| 196 |
+
cpu-only: true
|
| 197 |
+
runs-on: ${{ matrix.runner }}
|
| 198 |
+
name: ${{ matrix.script }}
|
| 199 |
+
steps:
|
| 200 |
+
- name: Checkout
|
| 201 |
+
uses: actions/checkout@v4
|
| 202 |
+
with:
|
| 203 |
+
path: ${{ github.run_id }}
|
| 204 |
+
- name: main
|
| 205 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 206 |
+
with:
|
| 207 |
+
runner: ${{ runner.name }}
|
| 208 |
+
script: ${{ matrix.script }}
|
| 209 |
+
is_unit_test: true
|
| 210 |
+
tests_to_run: ${{ inputs.test_to_run }}
|
| 211 |
+
cpu-only: ${{ matrix.cpu-only || false }}
|
| 212 |
+
is_optional: ${{ matrix.is-optional || false }}
|
.github/workflows/cicd-main.yml
ADDED
|
@@ -0,0 +1,450 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2025, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: CICD NeMo
|
| 15 |
+
on:
|
| 16 |
+
schedule:
|
| 17 |
+
- cron: 0 0 * * *
|
| 18 |
+
pull_request:
|
| 19 |
+
branches:
|
| 20 |
+
- main
|
| 21 |
+
- r**
|
| 22 |
+
- weekly-bump*
|
| 23 |
+
types: [labeled]
|
| 24 |
+
workflow_dispatch:
|
| 25 |
+
inputs:
|
| 26 |
+
test_to_run:
|
| 27 |
+
required: false
|
| 28 |
+
default: all
|
| 29 |
+
type: string
|
| 30 |
+
description: Comma-separated list of tests to run. Use "all" to run the full test suite.
|
| 31 |
+
|
| 32 |
+
concurrency:
|
| 33 |
+
# group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.event.pull_request.number || github.ref }}-${{ github.event_name }}
|
| 34 |
+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
|
| 35 |
+
cancel-in-progress: true
|
| 36 |
+
|
| 37 |
+
jobs:
|
| 38 |
+
pre-flight:
|
| 39 |
+
runs-on: ubuntu-latest
|
| 40 |
+
outputs:
|
| 41 |
+
test_to_run: ${{ steps.test_to_run.outputs.main }}
|
| 42 |
+
is_ci_workload: ${{ steps.is_ci_workload.outputs.main }}
|
| 43 |
+
no_fail_fast: ${{ steps.no_fail_fast.outputs.main }}
|
| 44 |
+
components_to_run: ${{ steps.components_to_run.outputs.main }}
|
| 45 |
+
env:
|
| 46 |
+
TESTS_TO_RUN: ${{ inputs.test_to_run }}
|
| 47 |
+
EVENT_NAME: ${{ github.event_name }}
|
| 48 |
+
HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }}
|
| 49 |
+
steps:
|
| 50 |
+
- name: Checkout branch
|
| 51 |
+
uses: actions/checkout@v4
|
| 52 |
+
with:
|
| 53 |
+
fetch-depth: 0
|
| 54 |
+
|
| 55 |
+
- name: Select components to run
|
| 56 |
+
id: components_to_run
|
| 57 |
+
run: |
|
| 58 |
+
pip install -U pip
|
| 59 |
+
pip install git-python
|
| 60 |
+
|
| 61 |
+
if [[ "$EVENT_NAME" == "pull_request" ]]; then
|
| 62 |
+
python .github/scripts/components_to_run.py --source-sha ${{ github.event.pull_request.head.sha }} --target-sha ${{ github.event.pull_request.base.sha }}
|
| 63 |
+
else
|
| 64 |
+
echo '["nemo2", "export-deploy", "speech"]' | tee -a test_modules.json
|
| 65 |
+
fi
|
| 66 |
+
|
| 67 |
+
components_to_run=$(cat test_modules.json)
|
| 68 |
+
|
| 69 |
+
echo "main=${components_to_run}" | tee -a "$GITHUB_OUTPUT"
|
| 70 |
+
|
| 71 |
+
- name: Select tests to run
|
| 72 |
+
id: test_to_run
|
| 73 |
+
run: |
|
| 74 |
+
# For manual dispatch, we replace `all` with the actual job names
|
| 75 |
+
if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then
|
| 76 |
+
TESTS_TO_RUN=$TESTS_TO_RUN
|
| 77 |
+
|
| 78 |
+
# For correctly labeled PR, we replace `all` with the actual job names
|
| 79 |
+
elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" == "true" ]]; then
|
| 80 |
+
TESTS_TO_RUN=all
|
| 81 |
+
|
| 82 |
+
# For incorrectly labeled PR, run no tests
|
| 83 |
+
elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" != "true" ]]; then
|
| 84 |
+
TESTS_TO_RUN=""
|
| 85 |
+
|
| 86 |
+
# For push events, run all tests. This is so that we can generate coverage
|
| 87 |
+
# on branch `main`.
|
| 88 |
+
elif [[ "$EVENT_NAME" == "push" || "$EVENT_NAME" == "schedule" ]]; then
|
| 89 |
+
TESTS_TO_RUN=all
|
| 90 |
+
|
| 91 |
+
else
|
| 92 |
+
echo "Unsupported event_name $EVENT_NAME provided".
|
| 93 |
+
exit 1
|
| 94 |
+
fi
|
| 95 |
+
|
| 96 |
+
parsed_string=$(echo "$TESTS_TO_RUN" | jq -c --raw-input 'split(",")')
|
| 97 |
+
echo "main=${parsed_string}" | tee -a "$GITHUB_OUTPUT"
|
| 98 |
+
|
| 99 |
+
- name: Check if this is a CI workload
|
| 100 |
+
shell: bash
|
| 101 |
+
id: is_ci_workload
|
| 102 |
+
run: |
|
| 103 |
+
branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
|
| 104 |
+
|
| 105 |
+
if [[ "$branch_name" =~ ^bump-ci-container || "$EVENT_NAME" == "schedule" ]]; then
|
| 106 |
+
is_ci_workload=true
|
| 107 |
+
echo "main=true" | tee -a "$GITHUB_OUTPUT"
|
| 108 |
+
else
|
| 109 |
+
is_ci_workload=false
|
| 110 |
+
fi
|
| 111 |
+
|
| 112 |
+
echo "main=$is_ci_workload" | tee -a "$GITHUB_OUTPUT"
|
| 113 |
+
|
| 114 |
+
- name: Check if no-fail-fast is set
|
| 115 |
+
shell: bash
|
| 116 |
+
id: no_fail_fast
|
| 117 |
+
env:
|
| 118 |
+
HAS_FAIL_FAST_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'no-fail-fast') }}
|
| 119 |
+
run: |
|
| 120 |
+
if [[ "$HAS_FAIL_FAST_LABEL" == "true" || "$EVENT_NAME" == "schedule" ]]; then
|
| 121 |
+
no_fail_fast=true
|
| 122 |
+
else
|
| 123 |
+
no_fail_fast=false
|
| 124 |
+
fi
|
| 125 |
+
|
| 126 |
+
echo "main=$no_fail_fast" | tee -a "$GITHUB_OUTPUT"
|
| 127 |
+
|
| 128 |
+
code-linting:
|
| 129 |
+
if: needs.pre-flight.outputs.test_to_run != '[]'
|
| 130 |
+
needs: [pre-flight]
|
| 131 |
+
uses: ./.github/workflows/code-linting.yml
|
| 132 |
+
|
| 133 |
+
cicd-wait-in-queue:
|
| 134 |
+
needs: [pre-flight, code-linting]
|
| 135 |
+
runs-on: ubuntu-latest
|
| 136 |
+
environment: test
|
| 137 |
+
if: |
|
| 138 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 139 |
+
&& needs.pre-flight.outputs.components_to_run != '[]'
|
| 140 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'false'
|
| 141 |
+
steps:
|
| 142 |
+
- name: Running CI tests
|
| 143 |
+
run: |
|
| 144 |
+
echo "Running CI tests"
|
| 145 |
+
|
| 146 |
+
cicd-test-container-build:
|
| 147 |
+
uses: ./.github/workflows/_build_container.yml
|
| 148 |
+
needs: [pre-flight, code-linting, cicd-wait-in-queue]
|
| 149 |
+
if: |
|
| 150 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 151 |
+
&& needs.pre-flight.outputs.components_to_run != '[]'
|
| 152 |
+
&& (
|
| 153 |
+
success()
|
| 154 |
+
|| (
|
| 155 |
+
needs.cicd-wait-in-queue.result == 'skipped'
|
| 156 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 157 |
+
)
|
| 158 |
+
)
|
| 159 |
+
&& !cancelled()
|
| 160 |
+
with:
|
| 161 |
+
image-name: nemo_container
|
| 162 |
+
dockerfile: docker/Dockerfile.ci
|
| 163 |
+
|
| 164 |
+
cicd-import-tests:
|
| 165 |
+
if: |
|
| 166 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 167 |
+
&& needs.pre-flight.outputs.components_to_run != '[]'
|
| 168 |
+
&& (
|
| 169 |
+
success()
|
| 170 |
+
|| (
|
| 171 |
+
needs.cicd-wait-in-queue.result == 'skipped'
|
| 172 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 173 |
+
)
|
| 174 |
+
)
|
| 175 |
+
&& !cancelled()
|
| 176 |
+
needs: [cicd-test-container-build, pre-flight]
|
| 177 |
+
runs-on: self-hosted-azure-gpus-1
|
| 178 |
+
steps:
|
| 179 |
+
- name: Create UUID
|
| 180 |
+
id: uuid
|
| 181 |
+
run: |
|
| 182 |
+
echo "id=$(uuidgen)" >> "$GITHUB_OUTPUT"
|
| 183 |
+
|
| 184 |
+
- name: Checkout NeMo
|
| 185 |
+
uses: actions/checkout@v4
|
| 186 |
+
with:
|
| 187 |
+
path: ${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo
|
| 188 |
+
|
| 189 |
+
- name: Run some checks
|
| 190 |
+
run: |
|
| 191 |
+
docker run \
|
| 192 |
+
--rm \
|
| 193 |
+
--device=/dev/nvidia0 \
|
| 194 |
+
--gpus all \
|
| 195 |
+
--shm-size=8g \
|
| 196 |
+
--volume $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo:/workspace \
|
| 197 |
+
--env TRANSFORMERS_OFFLINE=0 \
|
| 198 |
+
--env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\
|
| 199 |
+
# PyTorch Lightning version
|
| 200 |
+
python -c "import lightning.pytorch; print(lightning.pytorch.__version__)"
|
| 201 |
+
|
| 202 |
+
# PyTorch Lightning DDP Checks
|
| 203 |
+
CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py"
|
| 204 |
+
|
| 205 |
+
# Basic Import Checks
|
| 206 |
+
python tests/core_ptl/check_imports.py --domain asr
|
| 207 |
+
python tests/core_ptl/check_imports.py --domain tts
|
| 208 |
+
'
|
| 209 |
+
|
| 210 |
+
L0_Setup_Test_Data_And_Models:
|
| 211 |
+
needs: [pre-flight, cicd-test-container-build, cicd-wait-in-queue]
|
| 212 |
+
runs-on: self-hosted-azure
|
| 213 |
+
if: |
|
| 214 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 215 |
+
&& needs.pre-flight.outputs.components_to_run != '[]'
|
| 216 |
+
&& (
|
| 217 |
+
success()
|
| 218 |
+
|| (
|
| 219 |
+
needs.cicd-wait-in-queue.result == 'skipped'
|
| 220 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 221 |
+
)
|
| 222 |
+
)
|
| 223 |
+
&& !cancelled()
|
| 224 |
+
steps:
|
| 225 |
+
- name: Checkout
|
| 226 |
+
uses: actions/checkout@v4
|
| 227 |
+
with:
|
| 228 |
+
path: ${{ github.run_id }}
|
| 229 |
+
|
| 230 |
+
- name: main
|
| 231 |
+
uses: NVIDIA/NeMo/.github/actions/test-template@main
|
| 232 |
+
with:
|
| 233 |
+
runner: ${{ runner.name }}
|
| 234 |
+
script: L0_Setup_Test_Data_And_Models
|
| 235 |
+
tests_to_run: '["L0_Setup_Test_Data_And_Models"]'
|
| 236 |
+
|
| 237 |
+
cicd-main-unit-tests:
|
| 238 |
+
needs: [pre-flight, cicd-test-container-build]
|
| 239 |
+
uses: ./.github/workflows/cicd-main-unit-tests.yml
|
| 240 |
+
if: |
|
| 241 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 242 |
+
&& needs.pre-flight.outputs.components_to_run != '[]'
|
| 243 |
+
&& (
|
| 244 |
+
success()
|
| 245 |
+
|| (
|
| 246 |
+
needs.cicd-wait-in-queue.result == 'skipped'
|
| 247 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 248 |
+
)
|
| 249 |
+
)
|
| 250 |
+
&& !cancelled()
|
| 251 |
+
with:
|
| 252 |
+
test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
|
| 253 |
+
|
| 254 |
+
cicd-main-speech:
|
| 255 |
+
needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
|
| 256 |
+
uses: ./.github/workflows/cicd-main-speech.yml
|
| 257 |
+
if: |
|
| 258 |
+
(
|
| 259 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 260 |
+
&& (
|
| 261 |
+
contains(fromJson(needs.pre-flight.outputs.components_to_run), 'speech')
|
| 262 |
+
)
|
| 263 |
+
)
|
| 264 |
+
&& (
|
| 265 |
+
success()
|
| 266 |
+
|| (
|
| 267 |
+
needs.cicd-wait-in-queue.result == 'skipped'
|
| 268 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 269 |
+
)
|
| 270 |
+
)
|
| 271 |
+
&& !cancelled()
|
| 272 |
+
with:
|
| 273 |
+
test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
|
| 274 |
+
|
| 275 |
+
cicd-main-nemo2:
|
| 276 |
+
needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests]
|
| 277 |
+
uses: ./.github/workflows/cicd-main-nemo2.yml
|
| 278 |
+
if: |
|
| 279 |
+
(
|
| 280 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 281 |
+
&& (
|
| 282 |
+
contains(fromJson(needs.pre-flight.outputs.components_to_run), 'nemo2')
|
| 283 |
+
|| needs.pre-flight.outputs.components_to_run == '["all"]'
|
| 284 |
+
)
|
| 285 |
+
)
|
| 286 |
+
&& (
|
| 287 |
+
success()
|
| 288 |
+
|| (
|
| 289 |
+
needs.cicd-wait-in-queue.result == 'skipped'
|
| 290 |
+
&& needs.pre-flight.outputs.is_ci_workload == 'true'
|
| 291 |
+
)
|
| 292 |
+
)
|
| 293 |
+
&& !cancelled()
|
| 294 |
+
with:
|
| 295 |
+
test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
|
| 296 |
+
|
| 297 |
+
Nemo_CICD_Test:
|
| 298 |
+
needs:
|
| 299 |
+
- pre-flight
|
| 300 |
+
- cicd-test-container-build
|
| 301 |
+
- cicd-import-tests
|
| 302 |
+
- L0_Setup_Test_Data_And_Models
|
| 303 |
+
- cicd-main-unit-tests
|
| 304 |
+
- cicd-main-nemo2
|
| 305 |
+
- cicd-main-speech
|
| 306 |
+
if: always()
|
| 307 |
+
runs-on: ubuntu-latest
|
| 308 |
+
permissions: write-all
|
| 309 |
+
steps:
|
| 310 |
+
- name: Checkout
|
| 311 |
+
uses: actions/checkout@v4
|
| 312 |
+
|
| 313 |
+
- name: Get workflow result
|
| 314 |
+
id: result
|
| 315 |
+
env:
|
| 316 |
+
GH_TOKEN: ${{ github.token }}
|
| 317 |
+
RUN_ID: ${{ github.run_id }}
|
| 318 |
+
HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }}
|
| 319 |
+
IS_SCHEDULED: ${{ github.event_name == 'schedule' }}
|
| 320 |
+
run: |
|
| 321 |
+
# Get workflow run details and check job conclusions
|
| 322 |
+
LATEST_ATTEMPT=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion != null) | .conclusion] | last')
|
| 323 |
+
NUM_FAILED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "failure") | .name] | length')
|
| 324 |
+
NUM_CANCELLED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "cancelled") | .name] | length')
|
| 325 |
+
|
| 326 |
+
if [[ $NUM_FAILED -eq 0 && $NUM_CANCELLED -eq 0 && ("$HAS_LABEL" == "true" || "$IS_SCHEDULED" == "true") ]]; then
|
| 327 |
+
RESULT="success"
|
| 328 |
+
elif [[ $NUM_CANCELLED -gt 0 ]]; then
|
| 329 |
+
RESULT="cancelled"
|
| 330 |
+
else
|
| 331 |
+
RESULT="failure"
|
| 332 |
+
fi
|
| 333 |
+
|
| 334 |
+
# Output the final status
|
| 335 |
+
echo "code=$RESULT" | tee -a $GITHUB_OUTPUT
|
| 336 |
+
|
| 337 |
+
- name: Checkout for GH CLI
|
| 338 |
+
uses: actions/checkout@v4
|
| 339 |
+
|
| 340 |
+
- name: Remove label if not cancelled
|
| 341 |
+
if: |
|
| 342 |
+
steps.result.outputs.code != 'cancelled'
|
| 343 |
+
&& github.event.label.name == 'Run CICD'
|
| 344 |
+
&& github.event.pull_request.head.repo.full_name == github.repository
|
| 345 |
+
env:
|
| 346 |
+
GH_TOKEN: ${{ github.token }}
|
| 347 |
+
PR_NUMBER: ${{ github.event.number }}
|
| 348 |
+
run: gh pr edit "$PR_NUMBER" --remove-label "Run CICD"
|
| 349 |
+
|
| 350 |
+
- name: Pipeline successful, add PR comment
|
| 351 |
+
if: |
|
| 352 |
+
steps.result.outputs.code == 'success'
|
| 353 |
+
&& github.event_name == 'pull_request'
|
| 354 |
+
&& env.SLACK_WEBHOOK != ''
|
| 355 |
+
uses: peter-evans/create-or-update-comment@v4
|
| 356 |
+
env:
|
| 357 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
| 358 |
+
REPOSITORY: ${{ github.repository }}
|
| 359 |
+
RUN_ID: ${{ github.run_id }}
|
| 360 |
+
with:
|
| 361 |
+
issue-number: ${{ github.event.number }}
|
| 362 |
+
body: |
|
| 363 |
+
[🤖]: Hi @${{ github.event.pull_request.user.login }} 👋,
|
| 364 |
+
|
| 365 |
+
We wanted to let you know that a [CICD pipeline](https://github.com/${{ env.REPOSITORY }}/actions/runs/${{ env.RUN_ID }}) for this PR just finished successfully.
|
| 366 |
+
|
| 367 |
+
So it might be time to merge this PR or get some approvals.
|
| 368 |
+
|
| 369 |
+
//cc @chtruong814 @ko3n1g @pablo-garay @thomasdhc
|
| 370 |
+
|
| 371 |
+
- name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary"
|
| 372 |
+
if: |
|
| 373 |
+
steps.result.outputs.code == 'failure'
|
| 374 |
+
&& github.event.label.name == 'Run CICD'
|
| 375 |
+
&& env.SLACK_WEBHOOK != ''
|
| 376 |
+
env:
|
| 377 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
| 378 |
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
| 379 |
+
REPOSITORY: ${{ github.repository }}
|
| 380 |
+
RUN_ID: ${{ github.run_id }}
|
| 381 |
+
PR_NUMBER: ${{ github.event.number }}
|
| 382 |
+
SERVER_URL: ${{ github.server_url }}
|
| 383 |
+
run: |
|
| 384 |
+
set -x
|
| 385 |
+
pip install PyGithub
|
| 386 |
+
export BRANCH_NAME=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
|
| 387 |
+
|
| 388 |
+
python .github/scripts/notify.py
|
| 389 |
+
|
| 390 |
+
- name: Exit
|
| 391 |
+
if: ${{ always() }}
|
| 392 |
+
env:
|
| 393 |
+
RESULT: ${{ steps.result.outputs.code }}
|
| 394 |
+
run: |
|
| 395 |
+
if [ $RESULT == "success" ]; then
|
| 396 |
+
exit 0
|
| 397 |
+
else
|
| 398 |
+
exit 1
|
| 399 |
+
fi
|
| 400 |
+
|
| 401 |
+
Coverage:
|
| 402 |
+
runs-on: ubuntu-latest
|
| 403 |
+
needs: [pre-flight, Nemo_CICD_Test]
|
| 404 |
+
if: |
|
| 405 |
+
needs.pre-flight.outputs.test_to_run != '[]'
|
| 406 |
+
&& needs.pre-flight.outputs.components_to_run != '[]'
|
| 407 |
+
&& (
|
| 408 |
+
success()
|
| 409 |
+
|| needs.Nemo_CICD_Test.result == 'success'
|
| 410 |
+
)
|
| 411 |
+
&& !cancelled()
|
| 412 |
+
strategy:
|
| 413 |
+
matrix:
|
| 414 |
+
flag: [unit-test, e2e]
|
| 415 |
+
steps:
|
| 416 |
+
- name: Checkout
|
| 417 |
+
uses: actions/checkout@v4
|
| 418 |
+
|
| 419 |
+
- name: Download coverage reports of current branch
|
| 420 |
+
uses: actions/download-artifact@v4
|
| 421 |
+
with:
|
| 422 |
+
pattern: coverage-${{ matrix.flag }}-*
|
| 423 |
+
|
| 424 |
+
- name: Get total coverage of current branch
|
| 425 |
+
shell: bash -x -e -u -o pipefail {0}
|
| 426 |
+
if: always()
|
| 427 |
+
run: |
|
| 428 |
+
pip install coverage
|
| 429 |
+
|
| 430 |
+
ls -al .
|
| 431 |
+
ls -al coverage-*/
|
| 432 |
+
coverage combine --keep $(ls coverage-*/.coverage)
|
| 433 |
+
coverage report -i
|
| 434 |
+
rm -rf coverage-*
|
| 435 |
+
ls -al
|
| 436 |
+
|
| 437 |
+
- name: Upload coverage reports to Codecov
|
| 438 |
+
uses: codecov/codecov-action@v5
|
| 439 |
+
with:
|
| 440 |
+
token: ${{ secrets.CODECOV_TOKEN }}
|
| 441 |
+
verbose: true
|
| 442 |
+
flags: ${{ matrix.flag }}
|
| 443 |
+
|
| 444 |
+
- name: Upload artifacts
|
| 445 |
+
uses: actions/upload-artifact@v4
|
| 446 |
+
with:
|
| 447 |
+
name: coverage-${{ matrix.flag }}-aggregated
|
| 448 |
+
path: |
|
| 449 |
+
.coverage
|
| 450 |
+
include-hidden-files: true
|
.github/workflows/cicd-relabel-bot.yml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# If the PR get's updated by a new commit, it prevents auto-merges
|
| 2 |
+
# since there's no CI event attached to the commit anymore.
|
| 3 |
+
# This workflow re-attaches the label after a push, if the PR
|
| 4 |
+
# was already labeled prior to the push.
|
| 5 |
+
name: CICD Relabel bot
|
| 6 |
+
|
| 7 |
+
on:
|
| 8 |
+
pull_request_target:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
relabel:
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
env:
|
| 14 |
+
PR_NUMBER: ${{ github.event.number }}
|
| 15 |
+
GH_TOKEN: ${{ secrets.NEMO_RELABEL_TOKEN }}
|
| 16 |
+
HOSTNAME: ${{ github.server_url }}
|
| 17 |
+
permissions: write-all
|
| 18 |
+
steps:
|
| 19 |
+
- name: Checkout repo
|
| 20 |
+
uses: actions/checkout@v4
|
| 21 |
+
|
| 22 |
+
- name: Check if PR was already labeled with `Run CICD`
|
| 23 |
+
id: pre-flight
|
| 24 |
+
run: |
|
| 25 |
+
LABELS=$(gh pr view "$PR_NUMBER" --json labels)
|
| 26 |
+
HAS_LABEL=$(echo $LABELS \
|
| 27 |
+
| jq '[.labels[].name] | any(. == "Run CICD")'
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
echo "has-label=$HAS_LABEL" | tee -a "$GITHUB_OUTPUT"
|
| 31 |
+
|
| 32 |
+
- name: Relabel PR
|
| 33 |
+
if: ${{ steps.pre-flight.outputs.has-label == 'true' }}
|
| 34 |
+
run: |
|
| 35 |
+
gh pr edit "$PR_NUMBER" --remove-label "Run CICD"
|
| 36 |
+
gh pr edit "$PR_NUMBER" --add-label "Run CICD"
|
.github/workflows/close-inactive-issue-pr.yml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Stale-Close-Inactive-Issues-PRs
|
| 2 |
+
on:
|
| 3 |
+
schedule:
|
| 4 |
+
- cron: "30 1 * * *"
|
| 5 |
+
|
| 6 |
+
jobs:
|
| 7 |
+
close-issues:
|
| 8 |
+
runs-on: ubuntu-latest
|
| 9 |
+
permissions:
|
| 10 |
+
issues: write
|
| 11 |
+
pull-requests: write
|
| 12 |
+
steps:
|
| 13 |
+
- uses: actions/stale@v6
|
| 14 |
+
with:
|
| 15 |
+
operations-per-run: 100
|
| 16 |
+
days-before-issue-stale: 30
|
| 17 |
+
days-before-issue-close: 7
|
| 18 |
+
stale-issue-label: "stale"
|
| 19 |
+
stale-issue-message: "This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days."
|
| 20 |
+
close-issue-message: "This issue was closed because it has been inactive for 7 days since being marked as stale."
|
| 21 |
+
days-before-pr-stale: 14
|
| 22 |
+
days-before-pr-close: 7
|
| 23 |
+
stale-pr-message: "This PR is stale because it has been open for 14 days with no activity. Remove stale label or comment or update or this will be closed in 7 days."
|
| 24 |
+
close-pr-message: "This PR was closed because it has been inactive for 7 days since being marked as stale."
|
| 25 |
+
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
.github/workflows/code-formatting.yml
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Isort and Black Formatting
|
| 2 |
+
# Incrementally reformat only changed files with black, all files with isort
|
| 3 |
+
#
|
| 4 |
+
# Replaces pre-commit.ci, since it reformats all the files.
|
| 5 |
+
# See issue https://github.com/pre-commit-ci/issues/issues/90
|
| 6 |
+
#
|
| 7 |
+
# The action requires a custom token to trigger workflow after pushing reformatted files back to the branch.
|
| 8 |
+
# `secrets.GITHUB_TOKEN` can be used instead, but this will result
|
| 9 |
+
# in not running necessary checks after reformatting, which is undesirable.
|
| 10 |
+
# For details see https://github.com/orgs/community/discussions/25702
|
| 11 |
+
|
| 12 |
+
on:
|
| 13 |
+
pull_request_target:
|
| 14 |
+
paths:
|
| 15 |
+
- "**.py"
|
| 16 |
+
types: [opened, synchronize, reopened, labeled, unlabeled]
|
| 17 |
+
|
| 18 |
+
defaults:
|
| 19 |
+
run:
|
| 20 |
+
shell: bash -x -e -u -o pipefail {0}
|
| 21 |
+
|
| 22 |
+
jobs:
|
| 23 |
+
reformat_with_isort_and_black:
|
| 24 |
+
runs-on: ubuntu-latest
|
| 25 |
+
permissions:
|
| 26 |
+
# write permissions required to commit changes
|
| 27 |
+
contents: write
|
| 28 |
+
steps:
|
| 29 |
+
- name: Checkout branch
|
| 30 |
+
uses: actions/checkout@v4
|
| 31 |
+
with:
|
| 32 |
+
# setup repository and ref for PRs, see
|
| 33 |
+
# https://github.com/EndBug/add-and-commit?tab=readme-ov-file#working-with-prs
|
| 34 |
+
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
| 35 |
+
ref: ${{ github.event.pull_request.head.ref }}
|
| 36 |
+
# custom token is required to trigger actions after reformatting + pushing
|
| 37 |
+
token: ${{ secrets.NEMO_REFORMAT_TOKEN }}
|
| 38 |
+
fetch-depth: 0
|
| 39 |
+
|
| 40 |
+
- name: Get changed files
|
| 41 |
+
id: changed-files
|
| 42 |
+
uses: step-security/[email protected]
|
| 43 |
+
with:
|
| 44 |
+
files: |
|
| 45 |
+
**.py
|
| 46 |
+
|
| 47 |
+
- name: Setup Python env
|
| 48 |
+
uses: actions/setup-python@v5
|
| 49 |
+
with:
|
| 50 |
+
python-version: "3.10"
|
| 51 |
+
|
| 52 |
+
- name: black
|
| 53 |
+
uses: psf/black@stable
|
| 54 |
+
if: ${{ steps.changed-files.outputs.any_changed == 'true' }}
|
| 55 |
+
with:
|
| 56 |
+
options: "--verbose"
|
| 57 |
+
# apply only to changed files (pass explicitly the files)
|
| 58 |
+
src: "${{ steps.changed-files.outputs.all_changed_files }}"
|
| 59 |
+
version: "~= 24.3"
|
| 60 |
+
|
| 61 |
+
- name: isort
|
| 62 |
+
uses: isort/isort-action@v1
|
| 63 |
+
if: ${{ steps.changed-files.outputs.any_changed == 'true' }}
|
| 64 |
+
with:
|
| 65 |
+
isort-version: "5.13.2"
|
| 66 |
+
# reformat all files with isort – safe since the whole repo is already reformatted
|
| 67 |
+
configuration: ""
|
| 68 |
+
|
| 69 |
+
- uses: EndBug/add-and-commit@v9
|
| 70 |
+
# Commit changes. Nothing is committed if no changes.
|
| 71 |
+
with:
|
| 72 |
+
message: Apply isort and black reformatting
|
| 73 |
+
commit: --signoff
|
.github/workflows/code-init-file-checker.yml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Check __init__ files
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
pull_request:
|
| 5 |
+
types: [opened, synchronize, reopened]
|
| 6 |
+
|
| 7 |
+
jobs:
|
| 8 |
+
check-init-files:
|
| 9 |
+
runs-on: ubuntu-latest
|
| 10 |
+
steps:
|
| 11 |
+
- name: Checkout
|
| 12 |
+
uses: actions/checkout@v4
|
| 13 |
+
|
| 14 |
+
- name: Set up Python
|
| 15 |
+
uses: actions/setup-python@v4
|
| 16 |
+
with:
|
| 17 |
+
python-version: "3.11"
|
| 18 |
+
|
| 19 |
+
- name: Install init-file-checker
|
| 20 |
+
run: pip install init-file-checker
|
| 21 |
+
|
| 22 |
+
- name: Run init-file-checker
|
| 23 |
+
run: init-file-checker nemo/
|
.github/workflows/code-linting.yml
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: PyLint and flake8 linting
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
pull_request:
|
| 5 |
+
types: [opened, synchronize, reopened, labeled, unlabeled]
|
| 6 |
+
workflow_call:
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
linting:
|
| 10 |
+
name: "Domain: ${{ matrix.domain }}"
|
| 11 |
+
runs-on: ubuntu-latest
|
| 12 |
+
strategy:
|
| 13 |
+
fail-fast: false
|
| 14 |
+
matrix:
|
| 15 |
+
domain: [speech, other]
|
| 16 |
+
env:
|
| 17 |
+
DOMAIN: ${{ matrix.domain }}
|
| 18 |
+
steps:
|
| 19 |
+
- name: Checkout
|
| 20 |
+
uses: actions/checkout@v4
|
| 21 |
+
|
| 22 |
+
- name: Select filter
|
| 23 |
+
id: filter
|
| 24 |
+
run: |
|
| 25 |
+
if [[ "$DOMAIN" == "speech" ]]; then
|
| 26 |
+
FILTER=$(jq -crn '[
|
| 27 |
+
"nemo/collections/common/data/lhotse/*.py",
|
| 28 |
+
"nemo/collections/asr/**/*.py",
|
| 29 |
+
"nemo/collections/tts/**/*.py",
|
| 30 |
+
"nemo/collections/audio/**/*.py",
|
| 31 |
+
"nemo/collections/multimodal/speech_llm/**/*.py",
|
| 32 |
+
"nemo/collections/speechlm/**/*.py",
|
| 33 |
+
"nemo/collections/speechlm2/**/*.py"
|
| 34 |
+
] | join(",")')
|
| 35 |
+
|
| 36 |
+
else
|
| 37 |
+
FILTER=$(jq -crn '[
|
| 38 |
+
"nemo/**/*.py",
|
| 39 |
+
"!nemo/collections/common/data/lhotse/*.py",
|
| 40 |
+
"!nemo/collections/asr/**/*.py",
|
| 41 |
+
"!nemo/collections/tts/**/*.py",
|
| 42 |
+
"!nemo/collections/audio/**/*.py",
|
| 43 |
+
"!nemo/collections/multimodal/speech_llm/**/*.py",
|
| 44 |
+
"!nemo/collections/speechlm/**/*.py",
|
| 45 |
+
"!nemo/collections/speechlm2/**/*.py",
|
| 46 |
+
"!nemo/export/**/*.py"
|
| 47 |
+
] | join(",")')
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
echo "main=$FILTER" | tee -a "$GITHUB_OUTPUT"
|
| 51 |
+
|
| 52 |
+
- name: Get changed files
|
| 53 |
+
id: changed-files
|
| 54 |
+
uses: step-security/[email protected]
|
| 55 |
+
with:
|
| 56 |
+
files: ${{ steps.filter.outputs.main }}
|
| 57 |
+
files_separator: ","
|
| 58 |
+
separator: " "
|
| 59 |
+
|
| 60 |
+
- name: Run PyLint
|
| 61 |
+
id: pylint
|
| 62 |
+
env:
|
| 63 |
+
CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
|
| 64 |
+
SKIP_DOCS: ${{ contains(github.event.pull_request.labels.*.name, 'skip-docs') }}
|
| 65 |
+
SKIP_LINTING: ${{ contains(github.event.pull_request.labels.*.name, 'skip-linting') }}
|
| 66 |
+
run: |
|
| 67 |
+
if [[ -z "$CHANGED_FILES" ]]; then
|
| 68 |
+
echo Nothing to lint.
|
| 69 |
+
echo "exit-code=0" | tee -a "$GITHUB_OUTPUT"
|
| 70 |
+
exit 0
|
| 71 |
+
fi
|
| 72 |
+
|
| 73 |
+
if [[ $SKIP_DOCS == true ]]; then
|
| 74 |
+
ADDITIONAL_PYLINT_ARGS="--disable=C0115,C0116"
|
| 75 |
+
else
|
| 76 |
+
ADDITIONAL_PYLINT_ARGS=""
|
| 77 |
+
fi
|
| 78 |
+
|
| 79 |
+
if [[ $SKIP_LINTING == true ]]; then
|
| 80 |
+
ADDITIONAL_PYLINT_ARGS="--exit-zero"
|
| 81 |
+
fi
|
| 82 |
+
|
| 83 |
+
pip install pylint
|
| 84 |
+
set +e
|
| 85 |
+
pylint $ADDITIONAL_PYLINT_ARGS --output "pylintrc.$DOMAIN.txt" --rcfile ".pylintrc.$DOMAIN" ${CHANGED_FILES[@]}
|
| 86 |
+
echo "exit-code=$?" | tee -a "$GITHUB_OUTPUT"
|
| 87 |
+
|
| 88 |
+
- name: Run flake8
|
| 89 |
+
id: flake8
|
| 90 |
+
env:
|
| 91 |
+
CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
|
| 92 |
+
SKIP_LINTING: ${{ contains(github.event.pull_request.labels.*.name, 'skip-linting') }}
|
| 93 |
+
run: |
|
| 94 |
+
if [[ -z "$CHANGED_FILES" ]]; then
|
| 95 |
+
echo Nothing to lint.
|
| 96 |
+
echo "exit-code=0" | tee -a "$GITHUB_OUTPUT"
|
| 97 |
+
exit 0
|
| 98 |
+
fi
|
| 99 |
+
|
| 100 |
+
if [[ $SKIP_LINTING == true ]]; then
|
| 101 |
+
ADDITIONAL_FLAKE8_ARGS="--exit-zero"
|
| 102 |
+
else
|
| 103 |
+
ADDITIONAL_FLAKE8_ARGS=""
|
| 104 |
+
fi
|
| 105 |
+
|
| 106 |
+
pip install flake8
|
| 107 |
+
set +e
|
| 108 |
+
flake8 $ADDITIONAL_FLAKE8_ARGS --output "flake8.$DOMAIN.txt" --config ".flake8.$DOMAIN" ${CHANGED_FILES[@]}
|
| 109 |
+
echo "exit-code=$?" | tee -a "$GITHUB_OUTPUT"
|
| 110 |
+
|
| 111 |
+
- name: Summary
|
| 112 |
+
env:
|
| 113 |
+
PYLINT: ${{ steps.pylint.outputs.exit-code == 0 }}
|
| 114 |
+
FLAKE8: ${{ steps.flake8.outputs.exit-code == 0 }}
|
| 115 |
+
run: |
|
| 116 |
+
|
| 117 |
+
if [[ "$PYLINT" != "true" ]]; then
|
| 118 |
+
echo "Pylint output:" | tee -a $GITHUB_STEP_SUMMARY
|
| 119 |
+
|
| 120 |
+
echo '```' | tee -a $GITHUB_STEP_SUMMARY
|
| 121 |
+
cat pylintrc.$DOMAIN.txt | tee -a $GITHUB_STEP_SUMMARY
|
| 122 |
+
echo '```' | tee -a $GITHUB_STEP_SUMMARY
|
| 123 |
+
fi
|
| 124 |
+
|
| 125 |
+
if [[ "$FLAKE8" != "true" ]]; then
|
| 126 |
+
echo "Flake8 output:" | tee -a $GITHUB_STEP_SUMMARY
|
| 127 |
+
|
| 128 |
+
echo '```' | tee -a $GITHUB_STEP_SUMMARY
|
| 129 |
+
cat flake8.$DOMAIN.txt | tee -a $GITHUB_STEP_SUMMARY
|
| 130 |
+
echo '```' | tee -a $GITHUB_STEP_SUMMARY
|
| 131 |
+
fi
|
| 132 |
+
|
| 133 |
+
if [[ "$PYLINT" != "true" || "$FLAKE8" != "true" ]]; then
|
| 134 |
+
echo "The following directories got scanned:" | tee -a $GITHUB_STEP_SUMMARY
|
| 135 |
+
|
| 136 |
+
echo '```' | tee -a $GITHUB_STEP_SUMMARY
|
| 137 |
+
echo ${{ steps.filter.outputs.main }} | tee -a $GITHUB_STEP_SUMMARY
|
| 138 |
+
echo '```' | tee -a $GITHUB_STEP_SUMMARY
|
| 139 |
+
|
| 140 |
+
exit 1
|
| 141 |
+
fi
|
| 142 |
+
|
| 143 |
+
Nemo_Linting_Test:
|
| 144 |
+
needs: linting
|
| 145 |
+
runs-on: ubuntu-latest
|
| 146 |
+
if: always()
|
| 147 |
+
steps:
|
| 148 |
+
- name: Main
|
| 149 |
+
env:
|
| 150 |
+
RESULTS: ${{ toJson(needs.linting) }}
|
| 151 |
+
run: |
|
| 152 |
+
RESULT=$(echo "$RESULTS" | jq -r '.result')
|
| 153 |
+
|
| 154 |
+
if [[ "$RESULT" == "success" ]]; then
|
| 155 |
+
echo "All passed."
|
| 156 |
+
exit 0
|
| 157 |
+
else
|
| 158 |
+
echo "Some linting domains failed."
|
| 159 |
+
exit 1
|
| 160 |
+
fi
|
.github/workflows/codeql.yml
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# For most projects, this workflow file will not need changing; you simply need
|
| 2 |
+
# to commit it to your repository.
|
| 3 |
+
#
|
| 4 |
+
# You may wish to alter this file to override the set of languages analyzed,
|
| 5 |
+
# or to provide custom queries or build logic.
|
| 6 |
+
#
|
| 7 |
+
# ******** NOTE ********
|
| 8 |
+
# We have attempted to detect the languages in your repository. Please check
|
| 9 |
+
# the `language` matrix defined below to confirm you have the correct set of
|
| 10 |
+
# supported CodeQL languages.
|
| 11 |
+
#
|
| 12 |
+
name: "CodeQL"
|
| 13 |
+
|
| 14 |
+
on:
|
| 15 |
+
push:
|
| 16 |
+
branches: [ "main", "[rv][0-9]*", "gh-pages-src" ]
|
| 17 |
+
pull_request:
|
| 18 |
+
# The branches below must be a subset of the branches above
|
| 19 |
+
branches: [ "main" ]
|
| 20 |
+
schedule:
|
| 21 |
+
- cron: '19 1 * * 4'
|
| 22 |
+
|
| 23 |
+
jobs:
|
| 24 |
+
analyze:
|
| 25 |
+
name: Analyze
|
| 26 |
+
runs-on: ubuntu-latest
|
| 27 |
+
permissions:
|
| 28 |
+
actions: read
|
| 29 |
+
contents: read
|
| 30 |
+
security-events: write
|
| 31 |
+
|
| 32 |
+
strategy:
|
| 33 |
+
fail-fast: false
|
| 34 |
+
matrix:
|
| 35 |
+
language: [ 'python' ]
|
| 36 |
+
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
| 37 |
+
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
|
| 38 |
+
|
| 39 |
+
steps:
|
| 40 |
+
- name: Checkout repository
|
| 41 |
+
uses: actions/checkout@v3
|
| 42 |
+
|
| 43 |
+
# Initializes the CodeQL tools for scanning.
|
| 44 |
+
- name: Initialize CodeQL
|
| 45 |
+
uses: github/codeql-action/init@v2
|
| 46 |
+
with:
|
| 47 |
+
languages: ${{ matrix.language }}
|
| 48 |
+
# If you wish to specify custom queries, you can do so here or in a config file.
|
| 49 |
+
# By default, queries listed here will override any specified in a config file.
|
| 50 |
+
# Prefix the list here with "+" to use these queries and those in the config file.
|
| 51 |
+
|
| 52 |
+
# Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
| 53 |
+
queries: security-and-quality # security-extended,
|
| 54 |
+
config-file: ./.github/workflows/config/codeql.yml
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
|
| 58 |
+
# If this step fails, then you should remove it and run the build manually (see below)
|
| 59 |
+
- name: Autobuild
|
| 60 |
+
uses: github/codeql-action/autobuild@v2
|
| 61 |
+
|
| 62 |
+
# ℹ️ Command-line programs to run using the OS shell.
|
| 63 |
+
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
| 64 |
+
|
| 65 |
+
# If the Autobuild fails above, remove it and uncomment the following three lines.
|
| 66 |
+
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
|
| 67 |
+
|
| 68 |
+
# - run: |
|
| 69 |
+
# echo "Run, Build Application using script"
|
| 70 |
+
# ./location_of_script_within_repo/buildscript.sh
|
| 71 |
+
|
| 72 |
+
- name: Perform CodeQL Analysis
|
| 73 |
+
uses: github/codeql-action/analyze@v2
|
| 74 |
+
with:
|
| 75 |
+
category: "/language:${{matrix.language}}"
|
.github/workflows/community-bot.yml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Community Bot
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
issues:
|
| 5 |
+
types: [opened, edited, reopened, closed, deleted]
|
| 6 |
+
issue_comment:
|
| 7 |
+
types: [created, edited, deleted]
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
community-bot:
|
| 11 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
| 12 |
+
with:
|
| 13 |
+
community_project_id: ${{ vars.COMMUNITY_PROJECT_ID }}
|
| 14 |
+
secrets:
|
| 15 |
+
GH_TOKEN: ${{ secrets.PAT }}
|
.github/workflows/config/changelog-config.json
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"categories": [
|
| 3 |
+
{
|
| 4 |
+
"title": "## ASR\n\n<details><summary>Changelog</summary>",
|
| 5 |
+
"labels": ["asr"],
|
| 6 |
+
"exclude_labels": ["cherry-pick"]
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"title": "</details>\n\n## TTS\n\n<details><summary>Changelog</summary>",
|
| 10 |
+
"labels": ["tts"],
|
| 11 |
+
"exclude_labels": ["cherry-pick"]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"title": "</details>\n\n## NLP / NMT\n\n<details><summary>Changelog</summary>",
|
| 15 |
+
"labels": ["nlp", "nmt", "megatron"],
|
| 16 |
+
"exclude_labels": ["cherry-pick"]
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"title": "</details>\n\n## Text Normalization / Inverse Text Normalization\n\n<details><summary>Changelog</summary>",
|
| 20 |
+
"labels": ["tn", "itn"],
|
| 21 |
+
"exclude_labels": ["cherry-pick"]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"title": "</details>\n\n## NeMo Tools\n\n<details><summary>Changelog</summary>",
|
| 25 |
+
"labels": ["tools"],
|
| 26 |
+
"exclude_labels": ["cherry-pick"]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"title": "</details>\n\n## Export\n\n<details><summary>Changelog</summary>",
|
| 30 |
+
"labels": ["export"],
|
| 31 |
+
"exclude_labels": ["cherry-pick"]
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"title": "</details>\n\n## Documentation\n\n<details><summary>Changelog</summary>",
|
| 35 |
+
"labels": ["docs"],
|
| 36 |
+
"exclude_labels": ["cherry-pick"]
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"title": "</details>\n\n## Bugfixes\n\n<details><summary>Changelog</summary>",
|
| 40 |
+
"labels": ["bug"],
|
| 41 |
+
"exclude_labels": ["cherry-pick"]
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"title": "</details>\n\n## Cherrypick\n\n<details><summary>Changelog</summary>",
|
| 45 |
+
"labels": ["cherry-pick"],
|
| 46 |
+
"exclude_labels": ["cherry-pick"]
|
| 47 |
+
}
|
| 48 |
+
],
|
| 49 |
+
"ignore_labels": [
|
| 50 |
+
"ignore"
|
| 51 |
+
],
|
| 52 |
+
"sort": "ASC",
|
| 53 |
+
"template": "\n${{CHANGELOG}}</details>\n\n## Uncategorized:\n\n<details><summary>Changelog</summary>\n\n${{UNCATEGORIZED}}\n</details>\n",
|
| 54 |
+
"pr_template": "- ${{TITLE}} by @${{AUTHOR}} :: PR: #${{NUMBER}}",
|
| 55 |
+
"empty_template": "${{OWNER}}\n${{REPO}}\n${{FROM_TAG}}\n${{TO_TAG}}",
|
| 56 |
+
"label_extractor": [
|
| 57 |
+
{
|
| 58 |
+
"pattern": "(.*tts.*)|(.*g2p.*)",
|
| 59 |
+
"target": "tts",
|
| 60 |
+
"flags": "gimu",
|
| 61 |
+
"on_property": ["title", "body"]
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"pattern": "(.*asr.*)|(.*ctc.*)|(.*rnnt.*)|(.*transducer.*)|(.*dali.*)|(.*k2.*)",
|
| 65 |
+
"target": "asr",
|
| 66 |
+
"flags": "gimu",
|
| 67 |
+
"on_property": ["title", "body"]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"pattern": "(.*nlp.*)|(.*punctuation.*)|(.*capitalization.*)|(.*entity.*)|(.*glue.*)|(.*entity.*)|(.*retrieval.*)|(.*entity.*)|(.*intent.*)|(.*slot.*)|(.*entity.*)|(.*language.*)|(.*qa.*)|(.*token class.*)|(.*text class.*)",
|
| 71 |
+
"target": "nlp",
|
| 72 |
+
"flags": "gimu",
|
| 73 |
+
"on_property": ["title", "body"]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"pattern": "(.*nmt.*)|(.*bignlp.*)|(.*megatron.*)|(.*machine.*)|(.*translation.*)|(.*gpt.*)",
|
| 77 |
+
"target": "nmt",
|
| 78 |
+
"flags": "gimu",
|
| 79 |
+
"on_property": ["title", "body"]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"pattern": "(.*tn.*)|(.*itn.*)|(.*text norm.*)",
|
| 83 |
+
"target": "tn",
|
| 84 |
+
"flags": "gimu",
|
| 85 |
+
"on_property": ["title", "body"]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"pattern": "(.*sde.*)|(.*ctc segment.*)",
|
| 89 |
+
"target": "tools",
|
| 90 |
+
"flags": "gimu",
|
| 91 |
+
"on_property": ["title", "body"]
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"pattern": "(.*trt.*)|(.*onnx.*)|(.*export.*)",
|
| 95 |
+
"target": "export",
|
| 96 |
+
"flags": "gimu",
|
| 97 |
+
"on_property": ["title", "body"]
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"pattern": "(.*\\[x\\] Documentation.*)",
|
| 101 |
+
"target": "docs",
|
| 102 |
+
"flags": "gmu",
|
| 103 |
+
"on_property": ["title", "body"]
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"pattern": "(.*\\[x\\] Bugfix.*)|(.*patch.*)",
|
| 107 |
+
"target": "bug",
|
| 108 |
+
"flags": "gmu",
|
| 109 |
+
"on_property": ["title", "body"]
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"pattern": "(.*cherry-pick.*)|(.*cherrypick.*)",
|
| 113 |
+
"target": "cherrypick",
|
| 114 |
+
"flags": "gimu",
|
| 115 |
+
"on_property": ["title", "body"]
|
| 116 |
+
}
|
| 117 |
+
],
|
| 118 |
+
"duplicate_filter": {
|
| 119 |
+
"pattern": ".+",
|
| 120 |
+
"on_property": "title",
|
| 121 |
+
"method": "match"
|
| 122 |
+
},
|
| 123 |
+
"transformers": [
|
| 124 |
+
],
|
| 125 |
+
"max_tags_to_fetch": 100,
|
| 126 |
+
"max_pull_requests": 500,
|
| 127 |
+
"max_back_track_time_days": 365,
|
| 128 |
+
"exclude_merge_branches": [
|
| 129 |
+
],
|
| 130 |
+
"tag_resolver": {
|
| 131 |
+
"method": "semver"
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
|
.github/workflows/config/codeql.yml
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: "CodeQL config"
|
| 2 |
+
|
| 3 |
+
paths:
|
| 4 |
+
- nemo/
|
| 5 |
+
- tests/
|
| 6 |
+
- tools/
|
| 7 |
+
- scripts/
|
| 8 |
+
- examples/
|
| 9 |
+
- .github/
|
.github/workflows/copyright-check.yml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
name: Copyright check
|
| 16 |
+
|
| 17 |
+
on:
|
| 18 |
+
pull_request:
|
| 19 |
+
|
| 20 |
+
jobs:
|
| 21 |
+
copyright-check:
|
| 22 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
.github/workflows/gh-docs.yml
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: gh-docs-build
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
pull_request:
|
| 5 |
+
paths:
|
| 6 |
+
- "**"
|
| 7 |
+
|
| 8 |
+
# Set the access for individual scopes
|
| 9 |
+
permissions: write-all
|
| 10 |
+
|
| 11 |
+
env:
|
| 12 |
+
PYTHON_VERSION: "3.11"
|
| 13 |
+
|
| 14 |
+
jobs:
|
| 15 |
+
deploy:
|
| 16 |
+
runs-on: ubuntu-latest
|
| 17 |
+
|
| 18 |
+
container:
|
| 19 |
+
image: squidfunk/mkdocs-material
|
| 20 |
+
|
| 21 |
+
steps:
|
| 22 |
+
- uses: actions/checkout@v4
|
| 23 |
+
if: github.event.repository.fork == false
|
| 24 |
+
with:
|
| 25 |
+
ref: gh-pages-src
|
| 26 |
+
|
| 27 |
+
- name: "Correct github config"
|
| 28 |
+
if: github.event.repository.fork == false
|
| 29 |
+
run: |
|
| 30 |
+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
| 31 |
+
git config --global user.name "${GITHUB_ACTOR}"
|
| 32 |
+
git config --global user.email "${GITHUB_ACTOR}@users.noreply.${GITHUB_DOMAIN:-"github.com"}"
|
| 33 |
+
remote_repo="https://x-access-token:${GITHUB_TOKEN}@${GITHUB_DOMAIN:-"github.com"}/${GITHUB_REPOSITORY}.git"
|
| 34 |
+
echo "${remote_repo}"
|
| 35 |
+
git remote rm origin
|
| 36 |
+
git remote add origin "${remote_repo}"
|
| 37 |
+
|
| 38 |
+
- name: "Deploy Github Page"
|
| 39 |
+
continue-on-error: true
|
| 40 |
+
run: mkdocs gh-deploy --force
|
| 41 |
+
|
| 42 |
+
linkcheck:
|
| 43 |
+
runs-on: ubuntu-latest
|
| 44 |
+
steps:
|
| 45 |
+
- name: Checkout
|
| 46 |
+
uses: actions/checkout@v4
|
| 47 |
+
|
| 48 |
+
- name: Get changed files
|
| 49 |
+
id: changed-files
|
| 50 |
+
uses: step-security/[email protected]
|
| 51 |
+
with:
|
| 52 |
+
files: docs/**
|
| 53 |
+
files_separator: ","
|
| 54 |
+
separator: " "
|
| 55 |
+
|
| 56 |
+
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
| 57 |
+
if: steps.changed-files.outputs.any_changed == 'true'
|
| 58 |
+
uses: actions/setup-python@v5
|
| 59 |
+
with:
|
| 60 |
+
python-version: ${{ env.PYTHON_VERSION }}
|
| 61 |
+
|
| 62 |
+
- name: Install Sphinx dependencies
|
| 63 |
+
if: steps.changed-files.outputs.any_changed == 'true'
|
| 64 |
+
run: python3 -m pip install -r requirements/requirements_docs.txt
|
| 65 |
+
|
| 66 |
+
- name: Linkcheck docs build
|
| 67 |
+
if: steps.changed-files.outputs.any_changed == 'true'
|
| 68 |
+
run: make -C docs linkcheck || true
|
| 69 |
+
|
| 70 |
+
- name: Eliminate false positives
|
| 71 |
+
if: steps.changed-files.outputs.any_changed == 'true'
|
| 72 |
+
run: ./docs/check_for_broken_links.sh
|
| 73 |
+
|
| 74 |
+
- name: Upload linkcheck output
|
| 75 |
+
if: steps.changed-files.outputs.any_changed == 'true'
|
| 76 |
+
uses: actions/upload-artifact@v4
|
| 77 |
+
with:
|
| 78 |
+
name: linkcheck-artifact
|
| 79 |
+
path: docs/build/linkcheck
|
| 80 |
+
if-no-files-found: error
|
| 81 |
+
retention-days: 7
|
.github/workflows/install-test.yml
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CI-Install-Check
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
pull_request:
|
| 5 |
+
paths:
|
| 6 |
+
- "**"
|
| 7 |
+
|
| 8 |
+
concurrency:
|
| 9 |
+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
| 10 |
+
cancel-in-progress: true
|
| 11 |
+
|
| 12 |
+
jobs:
|
| 13 |
+
test-installs-macos:
|
| 14 |
+
name: ${{ matrix.os }}-py${{ matrix.python }}-${{ matrix.installer }}
|
| 15 |
+
runs-on: ${{ matrix.os }}
|
| 16 |
+
strategy:
|
| 17 |
+
fail-fast: false
|
| 18 |
+
matrix:
|
| 19 |
+
os: [macos-latest]
|
| 20 |
+
python: ["3.10", "3.11", "3.12"]
|
| 21 |
+
installer: ["pip-install", "nemo-install"]
|
| 22 |
+
steps:
|
| 23 |
+
- name: Checkout repo
|
| 24 |
+
uses: actions/checkout@v2
|
| 25 |
+
|
| 26 |
+
- name: Check disk space before cleanup
|
| 27 |
+
run: df -h
|
| 28 |
+
|
| 29 |
+
- name: Free up disk space
|
| 30 |
+
run: |
|
| 31 |
+
# Remove unnecessary files on macOS
|
| 32 |
+
sudo rm -rf /usr/local/lib/android || true
|
| 33 |
+
sudo rm -rf /usr/local/.ghcup || true
|
| 34 |
+
sudo rm -rf /usr/local/lib/node_modules || true
|
| 35 |
+
brew cleanup || true
|
| 36 |
+
# Clear pip cache
|
| 37 |
+
pip cache purge || true
|
| 38 |
+
|
| 39 |
+
- name: Check disk space after cleanup
|
| 40 |
+
run: df -h
|
| 41 |
+
|
| 42 |
+
- uses: actions/setup-python@v5
|
| 43 |
+
with:
|
| 44 |
+
python-version: "${{ matrix.python }}"
|
| 45 |
+
|
| 46 |
+
- name: Install NeMo
|
| 47 |
+
env:
|
| 48 |
+
INSTALLER: ${{ matrix.installer }}
|
| 49 |
+
NEMO_TAG: ${{ github.sha }}
|
| 50 |
+
NEMO_REPO: ${{ github.server_url }}/${{ github.repository }}
|
| 51 |
+
run: |
|
| 52 |
+
if [[ "$INSTALLER" == "pip-install" ]]; then
|
| 53 |
+
pip install --no-cache-dir -U pip
|
| 54 |
+
pip install --no-cache-dir ".[all]"
|
| 55 |
+
else
|
| 56 |
+
export NEMO_TAG
|
| 57 |
+
export NEMO_REPO
|
| 58 |
+
export INSTALL_DIR=$(pwd)
|
| 59 |
+
|
| 60 |
+
bash docker/common/install_dep.sh --library "te,mcore,extra" --mode install
|
| 61 |
+
pip install --no-cache-dir ".[all]"
|
| 62 |
+
fi
|
| 63 |
+
|
| 64 |
+
- name: Check disk space after installation
|
| 65 |
+
run: df -h
|
| 66 |
+
|
| 67 |
+
- name: Run import checks
|
| 68 |
+
run: |
|
| 69 |
+
# Run import checks
|
| 70 |
+
for collection in "asr" "tts" "lightning" "core"; do
|
| 71 |
+
python tests/core_ptl/check_imports.py --domain "$collection"
|
| 72 |
+
done
|
| 73 |
+
|
| 74 |
+
test-installs-linux-amd:
|
| 75 |
+
name: ubuntu-22.04-amd-py${{ matrix.python }}-${{ matrix.installer }}
|
| 76 |
+
runs-on: ubuntu-22.04
|
| 77 |
+
strategy:
|
| 78 |
+
fail-fast: false
|
| 79 |
+
matrix:
|
| 80 |
+
python: ["3.10", "3.11", "3.12"]
|
| 81 |
+
installer: ["pip-install", "nemo-install"]
|
| 82 |
+
steps:
|
| 83 |
+
- name: Checkout repo
|
| 84 |
+
uses: actions/checkout@v2
|
| 85 |
+
|
| 86 |
+
- name: Check disk space before cleanup
|
| 87 |
+
run: df -h
|
| 88 |
+
|
| 89 |
+
- name: Free up disk space
|
| 90 |
+
run: |
|
| 91 |
+
# Remove unnecessary packages and files on Ubuntu
|
| 92 |
+
sudo apt-get clean
|
| 93 |
+
sudo rm -rf /usr/local/lib/android || true
|
| 94 |
+
sudo rm -rf /opt/ghc || true
|
| 95 |
+
sudo rm -rf /usr/local/.ghcup || true
|
| 96 |
+
sudo rm -rf /usr/share/dotnet || true
|
| 97 |
+
sudo rm -rf /opt/az || true
|
| 98 |
+
# Clear pip and npm caches
|
| 99 |
+
pip cache purge || true
|
| 100 |
+
sudo npm cache clean --force || true
|
| 101 |
+
|
| 102 |
+
- name: Check disk space after cleanup
|
| 103 |
+
run: df -h
|
| 104 |
+
|
| 105 |
+
- name: Install Python
|
| 106 |
+
uses: actions/setup-python@v5
|
| 107 |
+
with:
|
| 108 |
+
python-version: ${{ matrix.python }}
|
| 109 |
+
|
| 110 |
+
- name: Install NeMo
|
| 111 |
+
env:
|
| 112 |
+
INSTALLER: ${{ matrix.installer }}
|
| 113 |
+
run: |
|
| 114 |
+
if [ "$INSTALLER" = "pip-install" ]; then
|
| 115 |
+
pip install --no-cache-dir --upgrade pip
|
| 116 |
+
pip install --no-cache-dir ".[all]"
|
| 117 |
+
else
|
| 118 |
+
export INSTALL_DIR=$(pwd)
|
| 119 |
+
bash docker/common/install_dep.sh --library "te,mcore,extra" --mode install
|
| 120 |
+
pip install --no-cache-dir ".[all]"
|
| 121 |
+
fi
|
| 122 |
+
|
| 123 |
+
- name: Check disk space after installation
|
| 124 |
+
run: df -h
|
| 125 |
+
|
| 126 |
+
- name: Run import checks
|
| 127 |
+
run: |
|
| 128 |
+
# Run import checks
|
| 129 |
+
for collection in "asr" "tts" "lightning" "core"; do
|
| 130 |
+
python tests/core_ptl/check_imports.py --domain "$collection"
|
| 131 |
+
done
|
| 132 |
+
|
| 133 |
+
test-asr-install-linux-amd:
|
| 134 |
+
name: ubuntu-22.04-amd-py${{ matrix.python }}-asr
|
| 135 |
+
runs-on: ubuntu-22.04
|
| 136 |
+
strategy:
|
| 137 |
+
fail-fast: false
|
| 138 |
+
matrix:
|
| 139 |
+
python: ["3.10", "3.11", "3.12"]
|
| 140 |
+
steps:
|
| 141 |
+
- name: Checkout repo
|
| 142 |
+
uses: actions/checkout@v2
|
| 143 |
+
|
| 144 |
+
- name: Check disk space before cleanup
|
| 145 |
+
run: df -h
|
| 146 |
+
|
| 147 |
+
- name: Free up disk space
|
| 148 |
+
run: |
|
| 149 |
+
# Remove unnecessary packages and files on Ubuntu
|
| 150 |
+
sudo apt-get clean
|
| 151 |
+
sudo rm -rf /usr/local/lib/android || true
|
| 152 |
+
sudo rm -rf /opt/ghc || true
|
| 153 |
+
sudo rm -rf /usr/local/.ghcup || true
|
| 154 |
+
sudo rm -rf /usr/share/dotnet || true
|
| 155 |
+
sudo rm -rf /opt/az || true
|
| 156 |
+
# Clear pip and npm caches
|
| 157 |
+
pip cache purge || true
|
| 158 |
+
sudo npm cache clean --force || true
|
| 159 |
+
|
| 160 |
+
- name: Check disk space after cleanup
|
| 161 |
+
run: df -h
|
| 162 |
+
|
| 163 |
+
- name: Install Python
|
| 164 |
+
uses: actions/setup-python@v5
|
| 165 |
+
with:
|
| 166 |
+
python-version: ${{ matrix.python }}
|
| 167 |
+
|
| 168 |
+
- name: Install NeMo
|
| 169 |
+
run: |
|
| 170 |
+
pip install --no-cache-dir --upgrade pip
|
| 171 |
+
pip install --no-cache-dir ".[asr]"
|
| 172 |
+
|
| 173 |
+
- name: Check disk space after installation
|
| 174 |
+
run: df -h
|
| 175 |
+
|
| 176 |
+
- name: Run import checks
|
| 177 |
+
run: |
|
| 178 |
+
# Run import checks
|
| 179 |
+
python tests/core_ptl/check_imports.py --domain asr
|
| 180 |
+
|
| 181 |
+
test-installs-linux-arm:
|
| 182 |
+
name: ubuntu-22.04-arm-py${{ matrix.python }}-${{ matrix.installer }}
|
| 183 |
+
runs-on: ubuntu-22.04-arm
|
| 184 |
+
strategy:
|
| 185 |
+
fail-fast: false
|
| 186 |
+
matrix:
|
| 187 |
+
python: ["3.10", "3.11", "3.12"]
|
| 188 |
+
installer: ["pip-install", "nemo-install"]
|
| 189 |
+
steps:
|
| 190 |
+
- name: Checkout repo
|
| 191 |
+
uses: actions/checkout@v2
|
| 192 |
+
|
| 193 |
+
- name: Check disk space before cleanup
|
| 194 |
+
run: df -h
|
| 195 |
+
|
| 196 |
+
- name: Free up disk space
|
| 197 |
+
run: |
|
| 198 |
+
# Remove unnecessary packages and files on Ubuntu ARM
|
| 199 |
+
sudo apt-get clean
|
| 200 |
+
sudo rm -rf /usr/local/lib/android || true
|
| 201 |
+
sudo rm -rf /opt/ghc || true
|
| 202 |
+
sudo rm -rf /usr/local/.ghcup || true
|
| 203 |
+
sudo rm -rf /usr/share/dotnet || true
|
| 204 |
+
sudo rm -rf /opt/az || true
|
| 205 |
+
# Clear pip and npm caches
|
| 206 |
+
pip cache purge || true
|
| 207 |
+
sudo npm cache clean --force || true
|
| 208 |
+
|
| 209 |
+
- name: Check disk space after cleanup
|
| 210 |
+
run: df -h
|
| 211 |
+
|
| 212 |
+
- name: Install Python
|
| 213 |
+
uses: actions/setup-python@v5
|
| 214 |
+
with:
|
| 215 |
+
python-version: ${{ matrix.python }}
|
| 216 |
+
|
| 217 |
+
- name: Install NeMo
|
| 218 |
+
env:
|
| 219 |
+
INSTALLER: ${{ matrix.installer }}
|
| 220 |
+
run: |
|
| 221 |
+
if [ "$INSTALLER" = "pip-install" ]; then
|
| 222 |
+
pip install --no-cache-dir --upgrade pip
|
| 223 |
+
pip install --no-cache-dir ".[all]"
|
| 224 |
+
else
|
| 225 |
+
export INSTALL_DIR=$(pwd)
|
| 226 |
+
bash docker/common/install_dep.sh --library "te,mcore,extra" --mode install
|
| 227 |
+
pip install --no-cache-dir ".[all]"
|
| 228 |
+
fi
|
| 229 |
+
|
| 230 |
+
- name: Check disk space after installation
|
| 231 |
+
run: df -h
|
| 232 |
+
|
| 233 |
+
- name: Run import checks
|
| 234 |
+
run: |
|
| 235 |
+
# Run import checks
|
| 236 |
+
for collection in "asr" "tts" "lightning" "core"; do
|
| 237 |
+
python tests/core_ptl/check_imports.py --domain "$collection"
|
| 238 |
+
done
|
| 239 |
+
|
| 240 |
+
test-asr-installs-linux-arm:
|
| 241 |
+
name: ubuntu-22.04-arm-py${{ matrix.python }}-asr
|
| 242 |
+
runs-on: ubuntu-22.04-arm
|
| 243 |
+
strategy:
|
| 244 |
+
fail-fast: false
|
| 245 |
+
matrix:
|
| 246 |
+
python: ["3.10", "3.11", "3.12"]
|
| 247 |
+
steps:
|
| 248 |
+
- name: Checkout repo
|
| 249 |
+
uses: actions/checkout@v2
|
| 250 |
+
|
| 251 |
+
- name: Check disk space before cleanup
|
| 252 |
+
run: df -h
|
| 253 |
+
|
| 254 |
+
- name: Free up disk space
|
| 255 |
+
run: |
|
| 256 |
+
# Remove unnecessary packages and files on Ubuntu ARM
|
| 257 |
+
sudo apt-get clean
|
| 258 |
+
sudo rm -rf /usr/local/lib/android || true
|
| 259 |
+
sudo rm -rf /opt/ghc || true
|
| 260 |
+
sudo rm -rf /usr/local/.ghcup || true
|
| 261 |
+
sudo rm -rf /usr/share/dotnet || true
|
| 262 |
+
sudo rm -rf /opt/az || true
|
| 263 |
+
# Clear pip and npm caches
|
| 264 |
+
pip cache purge || true
|
| 265 |
+
sudo npm cache clean --force || true
|
| 266 |
+
|
| 267 |
+
- name: Check disk space after cleanup
|
| 268 |
+
run: df -h
|
| 269 |
+
|
| 270 |
+
- name: Install Python
|
| 271 |
+
uses: actions/setup-python@v5
|
| 272 |
+
with:
|
| 273 |
+
python-version: ${{ matrix.python }}
|
| 274 |
+
|
| 275 |
+
- name: Install NeMo
|
| 276 |
+
run: |
|
| 277 |
+
pip install --no-cache-dir --upgrade pip
|
| 278 |
+
pip install --no-cache-dir ".[asr]"
|
| 279 |
+
|
| 280 |
+
- name: Check disk space after installation
|
| 281 |
+
run: df -h
|
| 282 |
+
|
| 283 |
+
- name: Run import checks
|
| 284 |
+
run: |
|
| 285 |
+
# Run import checks
|
| 286 |
+
python tests/core_ptl/check_imports.py --domain asr
|
.github/workflows/labeler.yml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: "Pull Request Labeler"
|
| 2 |
+
on:
|
| 3 |
+
- pull_request_target
|
| 4 |
+
|
| 5 |
+
jobs:
|
| 6 |
+
triage:
|
| 7 |
+
permissions:
|
| 8 |
+
contents: read
|
| 9 |
+
pull-requests: write
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
steps:
|
| 12 |
+
- uses: actions/labeler@v4
|
| 13 |
+
with:
|
| 14 |
+
repo-token: "${{ secrets.GITHUB_TOKEN }}"
|
.github/workflows/mcore-tag-bump-bot.yml
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Regularly updates the CI container
|
| 2 |
+
name: Megatron Tag Bump Bot
|
| 3 |
+
on:
|
| 4 |
+
workflow_dispatch:
|
| 5 |
+
schedule:
|
| 6 |
+
- cron: 0 0 * * *
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
get-release-branch-names:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
outputs:
|
| 12 |
+
mcore: ${{ steps.get-branch.outputs.mcore_release_branch }}
|
| 13 |
+
nemo: ${{ steps.get-branch.outputs.nemo_release_branch }}
|
| 14 |
+
steps:
|
| 15 |
+
- name: Get release branch names
|
| 16 |
+
id: get-branch
|
| 17 |
+
run: |
|
| 18 |
+
latest_branch=$(git ls-remote --heads https://github.com/NVIDIA/Megatron-LM.git 'refs/heads/core_r*' |
|
| 19 |
+
grep -o 'core_r[0-9]\+\.[0-9]\+\.[0-9]\+' |
|
| 20 |
+
sort -V |
|
| 21 |
+
tail -n1)
|
| 22 |
+
echo "mcore_release_branch=$latest_branch" >> $GITHUB_OUTPUT
|
| 23 |
+
|
| 24 |
+
latest_branch=$(git ls-remote --heads https://github.com/NVIDIA/NeMo.git 'refs/heads/r*' |
|
| 25 |
+
grep -o 'r[0-9]\+\.[0-9]\+\.[0-9]\+' |
|
| 26 |
+
sort -V |
|
| 27 |
+
tail -n1)
|
| 28 |
+
echo "nemo_release_branch=$latest_branch" >> $GITHUB_OUTPUT
|
| 29 |
+
|
| 30 |
+
bump-tags:
|
| 31 |
+
needs: [get-release-branch-names]
|
| 32 |
+
strategy:
|
| 33 |
+
fail-fast: false
|
| 34 |
+
matrix:
|
| 35 |
+
include:
|
| 36 |
+
- nemo-target-branch: ${{ needs.get-release-branch-names.outputs.nemo }}
|
| 37 |
+
mcore-target-branch: ${{ needs.get-release-branch-names.outputs.mcore }}
|
| 38 |
+
- nemo-target-branch: main
|
| 39 |
+
mcore-target-branch: main
|
| 40 |
+
uses: ./.github/workflows/_bump_mcore_tag.yml
|
| 41 |
+
with:
|
| 42 |
+
nemo-target-branch: ${{ matrix.nemo-target-branch }}
|
| 43 |
+
mcore-target-branch: ${{ matrix.mcore-target-branch }}
|
| 44 |
+
secrets:
|
| 45 |
+
PAT: ${{ secrets.PAT }}
|
| 46 |
+
|
| 47 |
+
notify:
|
| 48 |
+
if: failure()
|
| 49 |
+
runs-on: ubuntu-latest
|
| 50 |
+
needs: [bump-tags]
|
| 51 |
+
steps:
|
| 52 |
+
- name: Notify
|
| 53 |
+
env:
|
| 54 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
| 55 |
+
SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
|
| 56 |
+
GITHUB_RUN_ID: ${{ github.run_id }}
|
| 57 |
+
GITHUB_REPOSITORY: ${{ github.repository }}
|
| 58 |
+
run: |
|
| 59 |
+
curl -X POST \
|
| 60 |
+
-H 'Content-type: application/json' \
|
| 61 |
+
--data "{\"text\":\":robot_joy: <https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}|Mcore-bump-bot workflow> failed. Please fix manually.\n\ncc ${SLACK_WEBHOOK_ADMIN}\"}" \
|
| 62 |
+
$SLACK_WEBHOOK
|
.github/workflows/monitor-single-vm.yml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: ~shut down a single VM
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_call:
|
| 5 |
+
inputs:
|
| 6 |
+
vm:
|
| 7 |
+
type: string
|
| 8 |
+
description: Name of VM
|
| 9 |
+
required: true
|
| 10 |
+
n_gpus:
|
| 11 |
+
type: string
|
| 12 |
+
description: Number of GPUs this VM has
|
| 13 |
+
required: true
|
| 14 |
+
|
| 15 |
+
jobs:
|
| 16 |
+
check-status-and-maybe-shutdown:
|
| 17 |
+
environment: main
|
| 18 |
+
runs-on: ${{ inputs.vm }}
|
| 19 |
+
outputs:
|
| 20 |
+
status: ${{ steps.status.outputs.main }}
|
| 21 |
+
steps:
|
| 22 |
+
- name: Check status
|
| 23 |
+
id: status
|
| 24 |
+
run: |
|
| 25 |
+
docker run --rm --runtime=nvidia --gpus ${{ inputs.n_gpus }} ubuntu nvidia-smi
|
| 26 |
+
|
| 27 |
+
NUM_GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
|
| 28 |
+
|
| 29 |
+
if [[ $NUM_GPUS -ne ${{ inputs.n_gpus }} ]]; then
|
| 30 |
+
echo "Issues with GPU detected, will take this runner offline."
|
| 31 |
+
echo "main=degraded" >> "$GITHUB_OUTPUT"
|
| 32 |
+
else
|
| 33 |
+
echo "main=healthy" >> "$GITHUB_OUTPUT"
|
| 34 |
+
fi
|
| 35 |
+
|
| 36 |
+
- name: Send Slack message & Disconnect runner from GitHub
|
| 37 |
+
if: ${{ steps.status.outputs.main == 'degraded' || failure() }}
|
| 38 |
+
run: |
|
| 39 |
+
MESSAGE='{
|
| 40 |
+
"blocks": [
|
| 41 |
+
{
|
| 42 |
+
"type": "section",
|
| 43 |
+
"text": {
|
| 44 |
+
"type": "mrkdwn",
|
| 45 |
+
"text": ":alert: VM bot 🤖: Hey <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>: VM `${{ inputs.vm }}` is having not the best day of their life, maybe bring them an apple or so."
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
]
|
| 49 |
+
}'
|
| 50 |
+
|
| 51 |
+
curl -X POST -H "Content-type: application/json" --data "$MESSAGE" ${{ secrets.SLACK_WEBHOOK }}
|
| 52 |
+
|
| 53 |
+
cd /home/azureuser/actions-runner
|
| 54 |
+
echo ${{ secrets.VM_KEY }} | sudo -S ./svc.sh stop
|
.github/workflows/monitor-vms.yml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Regularly updates the CI container
|
| 2 |
+
name: Reboots VMs in a controlled way
|
| 3 |
+
on:
|
| 4 |
+
schedule:
|
| 5 |
+
- cron: 0/15 * * * *
|
| 6 |
+
workflow_dispatch:
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
pre-flight:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
if: github.repository_owner == 'NVIDIA'
|
| 12 |
+
outputs:
|
| 13 |
+
list-of-vms: ${{ steps.main.outputs.main }}
|
| 14 |
+
environment: main
|
| 15 |
+
steps:
|
| 16 |
+
- name: Get list of VMs
|
| 17 |
+
id: main
|
| 18 |
+
env:
|
| 19 |
+
GITHUB_TOKEN: ${{ secrets.PAT }}
|
| 20 |
+
run: |
|
| 21 |
+
RUNNERS=$(curl -L \
|
| 22 |
+
-H "Accept: application/vnd.github+json" \
|
| 23 |
+
-H "Authorization: Bearer $GITHUB_TOKEN" \
|
| 24 |
+
-H "X-GitHub-Api-Version: 2022-11-28" \
|
| 25 |
+
https://api.github.com/repos/NVIDIA/NeMo/actions/runners)
|
| 26 |
+
|
| 27 |
+
MATRIX=$(echo $RUNNERS \
|
| 28 |
+
| jq -c '[
|
| 29 |
+
.runners[]
|
| 30 |
+
| select(.status == "online")
|
| 31 |
+
| select(.name | contains("cpu") | not)
|
| 32 |
+
| {
|
| 33 |
+
"vm": .name,
|
| 34 |
+
"n_gpus": [
|
| 35 |
+
.labels[]
|
| 36 |
+
| select(.name | endswith("gpu")) | .name
|
| 37 |
+
][0][:1]
|
| 38 |
+
}
|
| 39 |
+
]
|
| 40 |
+
'
|
| 41 |
+
)
|
| 42 |
+
echo main=$MATRIX | tee -a "$GITHUB_OUTPUT"
|
| 43 |
+
|
| 44 |
+
maintenance:
|
| 45 |
+
needs: pre-flight
|
| 46 |
+
strategy:
|
| 47 |
+
fail-fast: false
|
| 48 |
+
matrix:
|
| 49 |
+
include: ${{ fromJSON(needs.pre-flight.outputs.list-of-vms )}}
|
| 50 |
+
uses: ./.github/workflows/monitor-single-vm.yml
|
| 51 |
+
with:
|
| 52 |
+
vm: ${{ matrix.vm }}
|
| 53 |
+
n_gpus: ${{ matrix.n_gpus }}
|
| 54 |
+
secrets: inherit # pragma: allowlist secret
|
.github/workflows/release-freeze.yml
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: "Code freeze"
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_dispatch:
|
| 5 |
+
inputs:
|
| 6 |
+
type_of_release:
|
| 7 |
+
type: choice
|
| 8 |
+
description: Type of release
|
| 9 |
+
options:
|
| 10 |
+
- major
|
| 11 |
+
- minor
|
| 12 |
+
freeze-commit:
|
| 13 |
+
type: string
|
| 14 |
+
description: Commit SHA to use for cut-off
|
| 15 |
+
required: false
|
| 16 |
+
default: main
|
| 17 |
+
mcore_version:
|
| 18 |
+
description: "Version of MCore to use (must be a valid git ref)"
|
| 19 |
+
required: true
|
| 20 |
+
type: string
|
| 21 |
+
dry-run:
|
| 22 |
+
type: boolean
|
| 23 |
+
description: Dry-run of code-freeze
|
| 24 |
+
required: false
|
| 25 |
+
default: true
|
| 26 |
+
|
| 27 |
+
jobs:
|
| 28 |
+
code-freeze:
|
| 29 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
| 30 |
+
with:
|
| 31 |
+
library-name: NeMo-Toolkit
|
| 32 |
+
python-package: nemo
|
| 33 |
+
release-type: ${{ inputs.type_of_release }}
|
| 34 |
+
freeze-commit: ${{ inputs.freeze-commit }}
|
| 35 |
+
dry-run: ${{ inputs.dry-run }}
|
| 36 |
+
use-pat: true
|
| 37 |
+
secrets:
|
| 38 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
|
| 39 |
+
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
|
| 40 |
+
PAT: ${{ secrets.PAT }}
|
| 41 |
+
|
| 42 |
+
freeze-tags:
|
| 43 |
+
runs-on: ubuntu-latest
|
| 44 |
+
needs: [code-freeze]
|
| 45 |
+
environment: main
|
| 46 |
+
steps:
|
| 47 |
+
- name: Checkout repository
|
| 48 |
+
uses: actions/checkout@v4
|
| 49 |
+
with:
|
| 50 |
+
path: ${{ github.run_id }}
|
| 51 |
+
token: ${{ secrets.PAT }}
|
| 52 |
+
fetch-depth: 0
|
| 53 |
+
fetch-tags: true
|
| 54 |
+
ref: ${{ inputs.dry-run == true && inputs.freeze-commit || needs.code-freeze.outputs.release-branch }}
|
| 55 |
+
|
| 56 |
+
- name: Pin branch name in Notebooks
|
| 57 |
+
run: |
|
| 58 |
+
cd ${{ github.run_id }}
|
| 59 |
+
find tutorials -type f -name "*.ipynb" -exec sed -i "s/BRANCH = 'main'/BRANCH = '${{ needs.code-freeze.outputs.release-branch }}'/g" {} +
|
| 60 |
+
|
| 61 |
+
- name: Pin MCore in Dockerfile
|
| 62 |
+
run: |
|
| 63 |
+
cd ${{ github.run_id }}
|
| 64 |
+
sed -i 's/^ARG MCORE_TAG=.*$/ARG MCORE_TAG=${{ inputs.mcore_version }}/' docker/Dockerfile.ci
|
| 65 |
+
|
| 66 |
+
- name: Show status
|
| 67 |
+
run: |
|
| 68 |
+
cd ${{ github.run_id }}
|
| 69 |
+
git status
|
| 70 |
+
|
| 71 |
+
- name: Create PR
|
| 72 |
+
uses: peter-evans/create-pull-request@v6
|
| 73 |
+
id: create-pull-request
|
| 74 |
+
if: ${{ inputs.dry-run != true }}
|
| 75 |
+
with:
|
| 76 |
+
path: ${{ github.run_id }}
|
| 77 |
+
base: ${{ needs.code-freeze.outputs.release-branch }}
|
| 78 |
+
branch: ci/freeze-tags-${{ needs.code-freeze.outputs.release-branch }}
|
| 79 |
+
title: "Freeze tags in in `${{ needs.code-freeze.outputs.release-branch }}`"
|
| 80 |
+
body: |
|
| 81 |
+
🚀 PR to freeze tags in `${{ needs.code-freeze.outputs.release-branch }}`.
|
| 82 |
+
|
| 83 |
+
commit-message: "[🤠]: Howdy folks, let's release NeMo `${{ needs.code-freeze.outputs.release-branch }}` !"
|
| 84 |
+
signoff: true
|
| 85 |
+
assignees: okoenig
|
.github/workflows/release.yml
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: "Release Neural Modules"
|
| 15 |
+
|
| 16 |
+
on:
|
| 17 |
+
workflow_dispatch:
|
| 18 |
+
inputs:
|
| 19 |
+
release-ref:
|
| 20 |
+
description: Ref (SHA or branch name) to release
|
| 21 |
+
required: true
|
| 22 |
+
type: string
|
| 23 |
+
version-bump-branch:
|
| 24 |
+
description: Branch for version bump
|
| 25 |
+
required: true
|
| 26 |
+
type: string
|
| 27 |
+
dry-run:
|
| 28 |
+
description: Do not publish a wheel and GitHub release.
|
| 29 |
+
required: true
|
| 30 |
+
default: true
|
| 31 |
+
type: boolean
|
| 32 |
+
|
| 33 |
+
jobs:
|
| 34 |
+
release:
|
| 35 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
| 36 |
+
with:
|
| 37 |
+
release-ref: ${{ inputs.release-ref }}
|
| 38 |
+
python-package: nemo
|
| 39 |
+
python-version: "3.10"
|
| 40 |
+
library-name: Neural Modules
|
| 41 |
+
dry-run: ${{ inputs.dry-run }}
|
| 42 |
+
version-bump-branch: ${{ inputs.version-bump-branch }}
|
| 43 |
+
secrets:
|
| 44 |
+
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
|
| 45 |
+
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
| 46 |
+
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
|
| 47 |
+
SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
|
| 48 |
+
PAT: ${{ secrets.PAT }}
|
.github/workflows/secrets-detector.yml
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: Secrets detector
|
| 15 |
+
|
| 16 |
+
on:
|
| 17 |
+
pull_request_target:
|
| 18 |
+
branches:
|
| 19 |
+
- 'main'
|
| 20 |
+
|
| 21 |
+
jobs:
|
| 22 |
+
main:
|
| 23 |
+
runs-on: ubuntu-latest
|
| 24 |
+
steps:
|
| 25 |
+
- name: Checkout repository
|
| 26 |
+
uses: actions/checkout@v4
|
| 27 |
+
with:
|
| 28 |
+
fetch-depth: 0
|
| 29 |
+
token: ${{ secrets.NEMO_REFORMAT_TOKEN }}
|
| 30 |
+
|
| 31 |
+
- name: Install secrets detector
|
| 32 |
+
run: pip install detect-secrets
|
| 33 |
+
|
| 34 |
+
- name: Run on change-set
|
| 35 |
+
run: |
|
| 36 |
+
git diff --name-only --diff-filter=d --merge-base origin/main -z | xargs -0 detect-secrets-hook --disable-plugin HexHighEntropyString --baseline .secrets.baseline
|
| 37 |
+
|
| 38 |
+
- uses: EndBug/add-and-commit@v9
|
| 39 |
+
# Commit changes. Nothing is committed if no changes.
|
| 40 |
+
if: always()
|
| 41 |
+
with:
|
| 42 |
+
message: Update baseline
|
| 43 |
+
commit: --signoff
|
.github/workflows/update-buildcache.yml
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
name: Update build cache
|
| 15 |
+
on:
|
| 16 |
+
schedule:
|
| 17 |
+
- cron: 0 0 * * *
|
| 18 |
+
push:
|
| 19 |
+
branches:
|
| 20 |
+
- main
|
| 21 |
+
workflow_dispatch:
|
| 22 |
+
inputs:
|
| 23 |
+
runner:
|
| 24 |
+
required: false
|
| 25 |
+
default: self-hosted-azure-builder
|
| 26 |
+
type: string
|
| 27 |
+
description: VM to use for build
|
| 28 |
+
|
| 29 |
+
jobs:
|
| 30 |
+
pre-flight:
|
| 31 |
+
runs-on: ubuntu-latest
|
| 32 |
+
outputs:
|
| 33 |
+
build_args: ${{ steps.manifest.outputs.BUILD_ARGS }}
|
| 34 |
+
cache-from: ${{ steps.cache_from.outputs.LAST_PRS }}
|
| 35 |
+
steps:
|
| 36 |
+
- name: Checkout branch
|
| 37 |
+
uses: actions/checkout@v4
|
| 38 |
+
|
| 39 |
+
- name: Parse manifest.json
|
| 40 |
+
id: manifest
|
| 41 |
+
run: |
|
| 42 |
+
BUILD_ARGS=$(cat << EOF
|
| 43 |
+
BASE_IMAGE=$(cat requirements/manifest.json | jq -r '."ngc-pytorch"')
|
| 44 |
+
TRTLLM_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."trt-llm".repo')
|
| 45 |
+
TRTLLM_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."trt-llm".ref')
|
| 46 |
+
MLM_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."megatron-lm".repo')
|
| 47 |
+
MLM_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."megatron-lm".ref')
|
| 48 |
+
TE_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".transformer_engine.repo')
|
| 49 |
+
TE_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".transformer_engine.ref')
|
| 50 |
+
APEX_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".apex.repo')
|
| 51 |
+
APEX_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".apex.ref')
|
| 52 |
+
EOF
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
echo "BUILD_ARGS<<EOF" >> $GITHUB_OUTPUT
|
| 56 |
+
echo "$BUILD_ARGS" >> $GITHUB_OUTPUT
|
| 57 |
+
echo "EOF" >> $GITHUB_OUTPUT
|
| 58 |
+
|
| 59 |
+
- name: Get last merged PR
|
| 60 |
+
id: cache_from
|
| 61 |
+
env:
|
| 62 |
+
GH_TOKEN: ${{ github.token }}
|
| 63 |
+
run: |
|
| 64 |
+
LAST_PRS=$(gh api graphql -f query='
|
| 65 |
+
query {
|
| 66 |
+
repository(owner: "NVIDIA", name: "NeMo") {
|
| 67 |
+
pullRequests(states: MERGED, first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) {
|
| 68 |
+
nodes {
|
| 69 |
+
number
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
+
}' | jq -r '.data.repository.pullRequests.nodes[].number' | while read -r number; do
|
| 74 |
+
echo "nemoci.azurecr.io/nemo_container-buildcache:$number"
|
| 75 |
+
done)
|
| 76 |
+
|
| 77 |
+
echo "LAST_PRS<<EOF" >> $GITHUB_OUTPUT
|
| 78 |
+
echo "$LAST_PRS" >> $GITHUB_OUTPUT
|
| 79 |
+
echo "EOF" >> $GITHUB_OUTPUT
|
| 80 |
+
|
| 81 |
+
cicd-test-container-build:
|
| 82 |
+
needs: [pre-flight]
|
| 83 |
+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
|
| 84 |
+
strategy:
|
| 85 |
+
fail-fast: false
|
| 86 |
+
matrix:
|
| 87 |
+
include:
|
| 88 |
+
- dockerfile: docker/Dockerfile.ci
|
| 89 |
+
image-name: nemo_container_automodel
|
| 90 |
+
- dockerfile: docker/Dockerfile.ci
|
| 91 |
+
image-name: nemo_container_nemo2
|
| 92 |
+
- dockerfile: docker/Dockerfile.ci
|
| 93 |
+
image-name: nemo_container_speech
|
| 94 |
+
- dockerfile: docker/Dockerfile.ci
|
| 95 |
+
image-name: nemo_container
|
| 96 |
+
with:
|
| 97 |
+
image-name: ${{ matrix.image-name }}
|
| 98 |
+
dockerfile: ${{ matrix.dockerfile }}
|
| 99 |
+
image-label: nemo-core
|
| 100 |
+
build-args: |
|
| 101 |
+
IMAGE_LABEL=nemo-core
|
| 102 |
+
NEMO_TAG=${{ github.sha }}
|
| 103 |
+
NEMO_REPO=https://github.com/NVIDIA/NeMo
|
| 104 |
+
${{ needs.pre-flight.outputs.BUILD_ARGS }}
|
| 105 |
+
runner: ${{ inputs.runner || 'self-hosted-azure-builder' }}
|
| 106 |
+
use-inline-cache: false
|
| 107 |
+
prune-filter-timerange: 24h
|
| 108 |
+
cache-from: |
|
| 109 |
+
nemoci.azurecr.io/${{ matrix.image-name }}-buildcache:main
|
| 110 |
+
${{ needs.pre-flight.outputs.cache-from }}
|