Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
849b18b
ocean/craftax: proxy-backed full Craftax baseline + parity harness
Infatoshi Apr 18, 2026
eac5df3
ocean/craftax: native threefry PRNG + noise + floor-0 worldgen
Infatoshi Apr 18, 2026
44a516f
ocean/craftax: native world generation for all 9 floors
Infatoshi Apr 18, 2026
1c02a14
ocean/craftax: native ports of 9 simple step subsystems (no integrati…
Infatoshi Apr 19, 2026
8a3122b
ocean/craftax: native ports of projectile/spell/enchant/floor/chest
Infatoshi Apr 19, 2026
8ed0a49
ocean/craftax: native ports of do_crafting and place_block
Infatoshi Apr 19, 2026
612da13
ocean/craftax: native port of do_action
Infatoshi Apr 19, 2026
057fd61
ocean/craftax: native port of spawn_mobs
Infatoshi Apr 19, 2026
ea7bb89
ocean/craftax: native port of update_mobs
Infatoshi Apr 19, 2026
e99a214
ocean/craftax: fully native c_step, JAX proxy removed
Infatoshi Apr 19, 2026
bbe16c4
ocean/craftax: adversarial parity stress battery
Infatoshi Apr 19, 2026
e428b6e
craftax: restore production vec/train config + add convergence benchm…
Infatoshi Apr 19, 2026
049eb60
craftax: add classic env side-by-side with full for convergence bench…
Infatoshi Apr 19, 2026
9396e79
src: raise log Dict capacity from 32 to 256
Infatoshi Apr 20, 2026
c30b953
ocean/craftax: shared 16x16 texture renderer for full + classic
Infatoshi Apr 20, 2026
c7990cb
ocean/craftax: optimize spawn_mobs (bbox scan + early-out)
Infatoshi Apr 20, 2026
93cfb01
ocean/craftax: reset-pool for cached world regeneration
Infatoshi Apr 20, 2026
ef90154
ocean/craftax: update_mobs early-out on dead mob slots
Infatoshi Apr 20, 2026
20736e3
ocean/craftax: drop port-scaffolding subsystem tests
Infatoshi Apr 20, 2026
0fbf2d3
ocean/craftax: log 8 checkpoint achievements instead of all 67
Infatoshi Apr 20, 2026
754d725
Revert "src: raise log Dict capacity from 32 to 256"
Infatoshi Apr 20, 2026
1354e61
ocean/craftax_classic: optional reset-pool for cached worldgen
Infatoshi Apr 20, 2026
f6df148
craftax: reorg to top-level config + shared resources dir
Infatoshi Apr 20, 2026
0170aa5
build.sh: honor EXTRA_CFLAGS env var for per-build static-lib flags
Infatoshi Apr 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,13 @@ if [ -z "$NCCL_LFLAG" ]; then
NCCL_LFLAG=$(python -c "import nvidia.nccl, os; print('-L' + os.path.join(nvidia.nccl.__path__[0], 'lib'))" 2>/dev/null || echo "")
fi

WHEEL_RPATH_FLAGS=()
for lib_flag in "$CUDNN_LFLAG" "$NCCL_LFLAG"; do
if [[ "$lib_flag" == -L* ]]; then
WHEEL_RPATH_FLAGS+=("-Wl,-rpath,${lib_flag#-L}")
fi
done

export CCACHE_DIR="${CCACHE_DIR:-$HOME/.ccache}"
export CCACHE_BASEDIR="$(pwd)"
export CCACHE_COMPILERCHECK=content
Expand All @@ -232,7 +239,7 @@ if [ ! -f "$BINDING_SRC" ]; then
fi

echo "Compiling static library for $ENV..."
${CC:-clang} -c "${CLANG_OPT[@]}" \
${CC:-clang} -c "${CLANG_OPT[@]}" $EXTRA_CFLAGS \
-I. -Isrc -I$SRC_DIR -Ivendor \
-I./$RAYLIB_NAME/include -I$CUDA_HOME/include \
-DPLATFORM_DESKTOP \
Expand Down Expand Up @@ -268,6 +275,7 @@ if [ -z "$MODE" ]; then
${CXX:-g++} -shared -fPIC -fopenmp
build/bindings.o "$STATIC_LIB" "$RAYLIB_A"
-L$CUDA_HOME/lib64 $CUDNN_LFLAG $NCCL_LFLAG
"${WHEEL_RPATH_FLAGS[@]}"
-lcudart -lnccl -lnvidia-ml -lcublas -lcusolver -lcurand -lcudnn
$OMP_LIB $LINK_OPT
"${SHARED_LDFLAGS[@]}"
Expand Down
19 changes: 19 additions & 0 deletions config/craftax.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[base]
env_name = craftax

[vec]
total_agents = 8192
num_buffers = 4
num_threads = 16

[env]
seed_offset = 0
# Pre-generated world pool. Each reset memcpys from a pool entry
# instead of re-running generate_world (~ms -> ~us per reset).
# Bounds world diversity: at most reset_pool_size unique maps are
# ever seen per process. Set to 0 to disable (required for the
# parity harness to maintain exact per-seed determinism).
reset_pool_size = 1024

[train]
total_timesteps = 200_000_000
20 changes: 20 additions & 0 deletions config/craftax_classic.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[base]
env_name = craftax_classic

[vec]
total_agents = 8192
num_buffers = 4
num_threads = 16

[env]
# Pre-generated world pool. When > 0, c_reset memcpys from a random pool
# entry instead of re-running generate_world (~30 us -> ~0.5 us per reset).
# Default is 0 (disabled) because on classic the env is not the training
# bottleneck: policy backward/optimizer dominate, so caching doesn't move
# training SPS. Useful to set > 0 for sim-only workloads (data generation,
# evaluation rollouts) where c_step throughput matters. Bounds world
# diversity: at most reset_pool_size unique maps are ever seen per process.
reset_pool_size = 0

[train]
total_timesteps = 200_000_000
12 changes: 0 additions & 12 deletions config/ocean/craftax.ini

This file was deleted.

543 changes: 543 additions & 0 deletions ocean/craftax/PORT_NOTES.md

Large diffs are not rendered by default.

55 changes: 40 additions & 15 deletions ocean/craftax/binding.c
Original file line number Diff line number Diff line change
@@ -1,34 +1,59 @@
#define CRAFTAX_ENABLE_ENV_IMPL
#include "craftax.h"
#include "step_crafting.h"
#include "step_update_mobs.h"
#include "step_spawn_mobs.h"

#define OBS_SIZE 1345
#define OBS_SIZE CRAFTAX_OBS_SIZE
#define NUM_ATNS 1
#define ACT_SIZES {17}
#define ACT_SIZES {CRAFTAX_NUM_ACTIONS}
#define OBS_TENSOR_T FloatTensor

#define Env Craftax
#include "vecenv.h"

void my_init(Env* env, Dict* kwargs) {
// No per-env kwargs for Craftax-Classic: the 64x64 map, inventory sizes,
// mob caps, etc. are all compile-time constants.
env->num_agents = 1;

uint64_t seed_offset = 0;
DictItem* item = dict_get_unsafe(kwargs, "seed_offset");
if (item != NULL) {
seed_offset = (uint64_t)item->value;
}
env->seed = seed_offset + (uint64_t)env->rng;

// Process-wide reset pool (first caller wins, rest block until ready).
// 0 disables caching -- regenerate every reset (exact parity mode).
int reset_pool_size = 0;
DictItem* pool_item = dict_get_unsafe(kwargs, "reset_pool_size");
if (pool_item != NULL) reset_pool_size = (int)pool_item->value;
craftax_set_reset_pool_size(reset_pool_size);

c_init(env);
}

void my_log(Log* log, Dict* out) {
dict_set(out, "perf", log->perf);
dict_set(out, "score", log->score);
dict_set(out, "perf", log->perf);
dict_set(out, "score", log->score);
dict_set(out, "episode_return", log->episode_return);
dict_set(out, "episode_length", log->episode_length);

static const char* ACH_NAMES[NUM_ACHIEVEMENTS] = {
"collect_wood", "place_table", "eat_cow", "collect_sapling",
"collect_drink", "make_wood_pick", "make_wood_sword","place_plant",
"defeat_zombie", "collect_stone", "place_stone", "eat_plant",
"defeat_skeleton","make_stone_pick","make_stone_sword","wake_up",
"place_furnace", "collect_coal", "collect_iron", "collect_diamond",
"make_iron_pick", "make_iron_sword",
// Log 8 checkpoint achievements that form the tech / exploration curve.
// perf (above) already aggregates all 67 into a normalized score; the
// individual lines here are the milestones worth watching on a dashboard.
// The env still tracks all 67 internally for reward and perf; we just
// don't send every one through the log Dict.
struct { const char* name; int idx; } checkpoints[] = {
{"collect_wood", 0},
{"make_wood_pickaxe", 5},
{"make_stone_pickaxe", 13},
{"collect_iron", 18},
{"make_iron_pickaxe", 20},
{"collect_diamond", 19},
{"enter_gnomish_mines", 28},
{"defeat_necromancer", 48},
};
for (int i = 0; i < NUM_ACHIEVEMENTS; i++) {
dict_set(out, ACH_NAMES[i], log->achievements[i]);
for (int i = 0; i < (int)(sizeof(checkpoints) / sizeof(checkpoints[0])); i++) {
dict_set(out, checkpoints[i].name, log->achievements[checkpoints[i].idx]);
}
}
76 changes: 76 additions & 0 deletions ocean/craftax/craftax.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Standalone viewer for Craftax (random-action policy).
//
// Build:
// ./build.sh craftax --fast # optimized
// ./build.sh craftax --local # debug with sanitizers
// Run:
// ./craftax

#define CRAFTAX_ENABLE_ENV_IMPL
#include "craftax.h"
#include "step_crafting.h"
#include "step_update_mobs.h"
#include "step_spawn_mobs.h"

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

static uint32_t xorshift32(uint32_t* s) {
uint32_t x = *s;
x ^= x << 13; x ^= x >> 17; x ^= x << 5;
*s = x ? x : 0xdeadbeef;
return x;
}

int main(int argc, char** argv) {
uint64_t seed = (argc > 1) ? strtoull(argv[1], NULL, 10) : (uint64_t)time(NULL);

Craftax env;
memset(&env, 0, sizeof(env));
env.num_agents = 1;
env.seed = seed;
env.rng = (uint32_t)seed;

// Minimal buffers for a single agent
env.observations = calloc(CRAFTAX_OBS_SIZE, sizeof(float));
env.actions = calloc(1, sizeof(float));
env.rewards = calloc(1, sizeof(float));
env.terminals = calloc(1, sizeof(float));

c_init(&env);
c_reset(&env);

uint32_t action_rng = (uint32_t)(seed ^ 0x9E3779B9u);
bool human_control = false;
int human_action = CRAFTAX_ACTION_NOOP;

while (!WindowShouldClose()) {
// Toggle human control
if (IsKeyPressed(KEY_H)) human_control = !human_control;

if (human_control) {
human_action = CRAFTAX_ACTION_NOOP;
if (IsKeyPressed(KEY_A) || IsKeyPressed(KEY_LEFT)) human_action = CRAFTAX_ACTION_LEFT;
if (IsKeyPressed(KEY_D) || IsKeyPressed(KEY_RIGHT)) human_action = CRAFTAX_ACTION_RIGHT;
if (IsKeyPressed(KEY_W) || IsKeyPressed(KEY_UP)) human_action = CRAFTAX_ACTION_UP;
if (IsKeyPressed(KEY_S) || IsKeyPressed(KEY_DOWN)) human_action = CRAFTAX_ACTION_DOWN;
if (IsKeyPressed(KEY_SPACE)) human_action = CRAFTAX_ACTION_DO;
if (IsKeyPressed(KEY_Z)) human_action = CRAFTAX_ACTION_SLEEP;
env.actions[0] = (float)human_action;
if (human_action != CRAFTAX_ACTION_NOOP || IsKeyPressed(KEY_PERIOD)) c_step(&env);
} else {
env.actions[0] = (float)(xorshift32(&action_rng) % CRAFTAX_NUM_ACTIONS);
c_step(&env);
}

c_render(&env);
}

c_close(&env);
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
return 0;
}
Loading