winkin119 commited on
Commit
61afe31
·
verified ·
1 Parent(s): 14e4ab9

upload via upload_folder 2025-08-03T14:14:32.982945+00:00

Browse files
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ env_name: LunarLander-v3
3
+ tags:
4
+ - LunarLander-v3
5
+ - double-dqn
6
+ - reinforcement-learning
7
+ - custom-implementation
8
+ - deep-q-learning
9
+ - pytorch
10
+ model-index:
11
+ - name: DoubleDQN-1d-LunarLander-v3
12
+ results:
13
+ - task:
14
+ type: reinforcement-learning
15
+ name: reinforcement-learning
16
+ dataset:
17
+ name: LunarLander-v3
18
+ type: LunarLander-v3
19
+ metrics:
20
+ - type: mean_reward
21
+ value: 271.13 +/- 32.77
22
+ name: mean_reward
23
+ verified: false
24
+ ---
25
+
26
+ # **Double-DQN** Agent playing **LunarLander-v3**
27
+ This is a trained model of a **Double-DQN** agent playing **LunarLander-v3**.
28
+
29
+ ## Usage
30
+ ### create the conda env in https://github.com/GeneHit/drl_practice
31
+ ```bash
32
+ conda create -n drl python=3.10
33
+ conda activate drl
34
+ python -m pip install -r requirements.txt
35
+ ```
36
+
37
+ ### play with full model
38
+ ```python
39
+ # load the full model
40
+ model = load_from_hub(repo_id="winkin119/DoubleDQN-1d-LunarLander-v3", filename="full_model.pt")
41
+
42
+ # Create the environment.
43
+ env = gym.make("LunarLander-v3")
44
+ state, _ = env.reset()
45
+ action = model.action(state)
46
+ ...
47
+ ```
48
+ There is also a state dict version of the model, you can check the corresponding chapter in the repo.
eval_result.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "mean_reward": 271.1292218414864,
3
+ "std_reward": 32.76824691220499,
4
+ "datetime": "2025-07-30T12:11:33.037116+00:00",
5
+ "train_duration_min": "9.34"
6
+ }
full_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684be2bf9f5991db171c296b32d3b4829576a49302c95a9445c81764e51dd93c
3
+ size 281145
params.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_config": {
3
+ "env_id": "LunarLander-v3",
4
+ "env_kwargs": {},
5
+ "max_steps": null,
6
+ "normalize_obs": false,
7
+ "use_image": false,
8
+ "vector_env_num": 6,
9
+ "use_multi_processing": true,
10
+ "image_shape": null,
11
+ "frame_stack": 1,
12
+ "frame_skip": 1,
13
+ "training_render_mode": null
14
+ },
15
+ "device": "cpu",
16
+ "learning_rate": 0.0001,
17
+ "gamma": 0.99,
18
+ "checkpoint_pathname": "",
19
+ "max_grad_norm": null,
20
+ "log_interval": 50,
21
+ "track": true,
22
+ "eval_episodes": 100,
23
+ "eval_random_seed": 42,
24
+ "eval_video_num": 10,
25
+ "timesteps": 250000,
26
+ "epsilon_schedule": {
27
+ "_type": "LinearSchedule",
28
+ "_module": "practice.utils_for_coding.scheduler_utils",
29
+ "_start_e": 1.0,
30
+ "_end_e": 0.01,
31
+ "_duration": 150000,
32
+ "_start_t": 0
33
+ },
34
+ "replay_buffer_capacity": 120000,
35
+ "batch_size": 64,
36
+ "train_interval": 1,
37
+ "target_update_interval": 250,
38
+ "update_start_step": 2000,
39
+ "dqn_algorithm": "double"
40
+ }
replay.mp4 ADDED
Binary file (41.6 kB). View file
 
state_dict.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc5358fa47f1ccd737da9dcb0d6a2283b154719bdada1dbd40841beb056b0c64
3
+ size 279673
tensorboard/events.out.tfevents.1753876921.winkindeMacBook-Air.local.76622.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad003c56ec4b32c116d08f59d06260e9f819a52e4baac64ec44d440a575d45ac
3
+ size 1720886