Reinforcement Learning
ml-agents
TensorBoard
ONNX
deep-rl-class
mlagents
Pyramids
id: Pyramids_Success
Eval Results (legacy)
Instructions to use s1144662/ppo-Pyramids-v0 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use s1144662/ppo-Pyramids-v0 with ml-agents:
mlagents-load-from-hf --repo-id="s1144662/ppo-Pyramids-v0" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.4094105362892151, | |
| "min": 0.4094105362892151, | |
| "max": 1.403701901435852, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 12321.619140625, | |
| "min": 12321.619140625, | |
| "max": 42582.69921875, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 989957.0, | |
| "min": 29890.0, | |
| "max": 989957.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 989957.0, | |
| "min": 29890.0, | |
| "max": 989957.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.3289995789527893, | |
| "min": -0.11905790120363235, | |
| "max": 0.4239726662635803, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 84.88188934326172, | |
| "min": -28.692955017089844, | |
| "max": 113.20069885253906, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.014585393480956554, | |
| "min": 0.007604795973747969, | |
| "max": 0.5155928134918213, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 3.763031482696533, | |
| "min": 2.0076661109924316, | |
| "max": 122.19550323486328, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06803671671548814, | |
| "min": 0.06438046189612665, | |
| "max": 0.0744788520660477, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9525140340168339, | |
| "min": 0.5136651244805992, | |
| "max": 1.0427039289246678, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.011091926316328767, | |
| "min": 0.001242224995313356, | |
| "max": 0.01267189582836627, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.15528696842860273, | |
| "min": 0.017391149934386983, | |
| "max": 0.1774065415971278, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 7.585126043085716e-06, | |
| "min": 7.585126043085716e-06, | |
| "max": 0.0002952355730167143, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.00010619176460320002, | |
| "min": 0.00010619176460320002, | |
| "max": 0.003508104230631999, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10252834285714285, | |
| "min": 0.10252834285714285, | |
| "max": 0.1984118571428571, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4353968, | |
| "min": 1.3888829999999999, | |
| "max": 2.5693680000000003, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.0002625814514285715, | |
| "min": 0.0002625814514285715, | |
| "max": 0.009841344528571428, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.003676140320000001, | |
| "min": 0.003676140320000001, | |
| "max": 0.11695986319999999, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.013016738928854465, | |
| "min": 0.013016738928854465, | |
| "max": 0.6105430722236633, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.18223434686660767, | |
| "min": 0.18223434686660767, | |
| "max": 4.273801326751709, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 535.5102040816327, | |
| "min": 442.8985507246377, | |
| "max": 994.258064516129, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 26240.0, | |
| "min": 16561.0, | |
| "max": 33384.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.0936041483655572, | |
| "min": -0.8661032787734463, | |
| "max": 1.327961881482412, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 52.492999121546745, | |
| "min": -29.415601775050163, | |
| "max": 92.80839824676514, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.0936041483655572, | |
| "min": -0.8661032787734463, | |
| "max": 1.327961881482412, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 52.492999121546745, | |
| "min": -29.415601775050163, | |
| "max": 92.80839824676514, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.07007709181440684, | |
| "min": 0.0629829747005715, | |
| "max": 12.260135732152882, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 3.363700407091528, | |
| "min": 3.363700407091528, | |
| "max": 208.422307446599, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1767102534", | |
| "python_version": "3.9.12 (main, Apr 5 2022, 06:56:58) \n[GCC 7.5.0]", | |
| "command_line_arguments": "/content/miniconda/lib/python3.9/site-packages/mlagents/trainers/learn.py ./config/ppo/PyramidsRND.yaml --env=training-envs-executables/Pyramids/Pyramids --run-id=Pyramids_Success --no-graphics", | |
| "mlagents_version": "0.30.0", | |
| "mlagents_envs_version": "0.30.0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "1.11.0+cu102", | |
| "numpy_version": "1.21.2", | |
| "end_time_seconds": "1767104751" | |
| }, | |
| "total": 2217.2442677070003, | |
| "count": 1, | |
| "self": 0.4851586620006856, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.011539717000005112, | |
| "count": 1, | |
| "self": 0.011539717000005112 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 2216.747569328, | |
| "count": 1, | |
| "self": 1.4415964610566334, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 5.211652854000022, | |
| "count": 1, | |
| "self": 5.211652854000022 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 2209.986772549943, | |
| "count": 63515, | |
| "self": 1.4909983129923603, | |
| "children": { | |
| "env_step": { | |
| "total": 1591.0018555429428, | |
| "count": 63515, | |
| "self": 1482.3926766258496, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 107.7564419931175, | |
| "count": 63515, | |
| "self": 4.808155167134373, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 102.94828682598313, | |
| "count": 62539, | |
| "self": 102.94828682598313 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.8527369239757263, | |
| "count": 63515, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 2210.4591264390133, | |
| "count": 63515, | |
| "is_parallel": true, | |
| "self": 849.8586977290076, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.001869432999910714, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005955710000762338, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0012738619998344802, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0012738619998344802 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.06051289599986376, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0007312429995636194, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0005896570000913925, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005896570000913925 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.05677497500005302, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.05677497500005302 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.002417021000155728, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005348150002646435, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0018822059998910845, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0018822059998910845 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 1360.6004287100056, | |
| "count": 63514, | |
| "is_parallel": true, | |
| "self": 36.158674087053214, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 25.767724397927623, | |
| "count": 63514, | |
| "is_parallel": true, | |
| "self": 25.767724397927623 | |
| }, | |
| "communicator.exchange": { | |
| "total": 1176.9477050809567, | |
| "count": 63514, | |
| "is_parallel": true, | |
| "self": 1176.9477050809567 | |
| }, | |
| "steps_from_proto": { | |
| "total": 121.72632514406814, | |
| "count": 63514, | |
| "is_parallel": true, | |
| "self": 25.82130863692896, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 95.90501650713918, | |
| "count": 508112, | |
| "is_parallel": true, | |
| "self": 95.90501650713918 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 617.4939186940078, | |
| "count": 63515, | |
| "self": 2.615368658000989, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 110.30618302600851, | |
| "count": 63515, | |
| "self": 110.04682386800869, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.2593591579998247, | |
| "count": 2, | |
| "self": 0.2593591579998247 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 504.5723670099983, | |
| "count": 453, | |
| "self": 326.36562890001755, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 178.20673810998073, | |
| "count": 22815, | |
| "self": 178.20673810998073 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 8.750002962187864e-07, | |
| "count": 1, | |
| "self": 8.750002962187864e-07 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.10754658799987737, | |
| "count": 1, | |
| "self": 0.0011122509999950125, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.10643433699988236, | |
| "count": 1, | |
| "self": 0.10643433699988236 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |