Spaces:
Sleeping
Sleeping
| @startuml | |
| skinparam NoteBackgroundColor PapayaWhip | |
| autonumber | |
| participant Coordinator | |
| participant Learner | |
| participant Collector | |
| participant Middleware | |
| participant Operator | |
| group start | |
| Coordinator->Coordinator: start communication module | |
| Coordinator->Coordinator: start commander | |
| Coordinator->Coordinator: start replay buffer | |
| Coordinator->Operator: connect operator | |
| Operator->Coordinator: send collector/learner info | |
| Coordinator->Learner: create connection | |
| Coordinator->Collector: create connection | |
| end | |
| loop | |
| autonumber | |
| group learn(async) | |
| Coordinator->Learner: request learner start task | |
| note right | |
| policy config | |
| learner config | |
| end note | |
| Learner->Coordinator: return learner start info | |
| group learner loop | |
| Coordinator->Learner: request data demand task | |
| Learner->Coordinator: return data demand | |
| Coordinator->Learner: request learn task and send data(metadata) | |
| note right | |
| data path | |
| data priority | |
| end note | |
| Middleware->Learner: load data(stepdata) | |
| Learner->Learner: learner a iteration | |
| Learner->Middleware: send policy info | |
| note left | |
| model state_dict | |
| model hyper-parameter | |
| end note | |
| Learner->Coordinator: return learn info | |
| note right | |
| policy meta | |
| train stat | |
| data priority | |
| end note | |
| end | |
| Coordinator->Learner: request learner close task | |
| Learner->Coordinator: return learner close info | |
| note right | |
| save final policy | |
| end note | |
| end | |
| autonumber | |
| group data collection/evaluation(async) | |
| Coordinator->Collector: request collector start task | |
| note right | |
| policy meta | |
| env config | |
| collector config | |
| end note | |
| Collector->Coordinator: return collector start info | |
| Middleware->Collector: load policy info for init | |
| group collector loop | |
| Coordinator->Collector: request get data task | |
| Collector->Collector: policy interact with env | |
| Collector->Middleware: send data(stepdata) | |
| Collector->Coordinator: return data(metadata) | |
| note right | |
| data path | |
| data length(rollout length) | |
| end note | |
| Middleware->Collector: load policy info for update | |
| end group | |
| Coordinator->Collector: request collector close task | |
| Collector->Coordinator: return collector close info | |
| note right | |
| episode result(cumulative reward) | |
| collector performance | |
| end note | |
| end group | |
| end | |
| autonumber | |
| group close | |
| Coordinator->Learner: destroy connection | |
| Coordinator->Collector: destroy connection | |
| Coordinator->Operator: disconnect operator | |
| Coordinator->Coordinator: close | |
| end group | |
| @enduml | |