diff --git a/invokeai/app/invocations/qwen_image_denoise.py b/invokeai/app/invocations/qwen_image_denoise.py
index cd3ff917596..4c62c0ebec6 100644
--- a/invokeai/app/invocations/qwen_image_denoise.py
+++ b/invokeai/app/invocations/qwen_image_denoise.py
@@ -36,14 +36,14 @@
 
 @invocation(
     "qwen_image_denoise",
-    title="Denoise - Qwen Image Edit",
+    title="Denoise - Qwen Image",
     tags=["image", "qwen_image"],
     category="image",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Run the denoising process with a Qwen Image Edit model."""
+    """Run the denoising process with a Qwen Image model."""
 
     # If latents is provided, this means we are doing image-to-image.
     latents: Optional[LatentsField] = InputField(
@@ -132,7 +132,7 @@ def _get_noise(
         seed: int,
     ) -> torch.Tensor:
         rand_device = "cpu"
-        rand_dtype = torch.float16
+        rand_dtype = torch.float32
 
         return torch.randn(
             batch_size,
@@ -270,7 +270,7 @@ def _run_diffusion(self, context: InvocationContext):
 
         # Try to load the scheduler config from the model's directory (Diffusers models
         # have a scheduler/ subdir). For GGUF models this path doesn't exist, so fall
-        # back to instantiating the scheduler with the known Qwen Image Edit defaults.
+        # back to instantiating the scheduler with the known Qwen Image defaults.
         model_path = context.models.get_absolute_path(context.models.get_config(self.transformer.transformer))
         scheduler_path = model_path / "scheduler"
         if scheduler_path.is_dir() and (scheduler_path / "scheduler_config.json").exists():
@@ -304,8 +304,19 @@ def _run_diffusion(self, context: InvocationContext):
         init_sigmas = np.linspace(1.0, 1.0 / self.steps, self.steps).tolist()
         scheduler.set_timesteps(sigmas=init_sigmas, mu=mu, device=device)
 
-        timesteps_sched = scheduler.timesteps
-        sigmas_sched = scheduler.sigmas
+        # Clip the schedule based on denoising_start/denoising_end to support img2img strength.
+        # The scheduler's sigmas go from high (noisy) to 0 (clean). We clip to the fractional range.
+        sigmas_sched = scheduler.sigmas  # (N+1,) including terminal 0
+        if self.denoising_start > 0 or self.denoising_end < 1:
+            total_sigmas = len(sigmas_sched) - 1  # exclude terminal
+            start_idx = int(round(self.denoising_start * total_sigmas))
+            end_idx = int(round(self.denoising_end * total_sigmas))
+            sigmas_sched = sigmas_sched[start_idx : end_idx + 1]  # +1 to include the next sigma for dt
+            # Rebuild timesteps from clipped sigmas (exclude terminal 0)
+            timesteps_sched = sigmas_sched[:-1] * scheduler.config.num_train_timesteps
+        else:
+            timesteps_sched = scheduler.timesteps
+
         total_steps = len(timesteps_sched)
 
         cfg_scale = self._prepare_cfg_scale(total_steps)
@@ -353,29 +364,44 @@ def _run_diffusion(self, context: InvocationContext):
         # Pack latents into 2x2 patches: (B, C, H, W) -> (B, H/2*W/2, C*4)
         latents = self._pack_latents(latents, 1, out_channels, latent_height, latent_width)
 
-        # Pack reference image latents and concatenate along the sequence dimension.
-        # The edit transformer always expects [noisy_patches ; ref_patches] in its sequence.
-        if ref_latents is not None:
-            _, ref_ch, rh, rw = ref_latents.shape
-            if rh != latent_height or rw != latent_width:
-                ref_latents = torch.nn.functional.interpolate(
-                    ref_latents, size=(latent_height, latent_width), mode="bilinear"
+        # Determine whether the model uses reference latent conditioning (zero_cond_t).
+        # Edit models (zero_cond_t=True) expect [noisy_patches ; ref_patches] in the sequence.
+        # Txt2img models (zero_cond_t=False) only take noisy patches.
+        has_zero_cond_t = getattr(transformer_info.model, "zero_cond_t", False) or getattr(
+            transformer_info.model.config, "zero_cond_t", False
+        )
+        use_ref_latents = has_zero_cond_t
+
+        ref_latents_packed = None
+        if use_ref_latents:
+            if ref_latents is not None:
+                _, ref_ch, rh, rw = ref_latents.shape
+                if rh != latent_height or rw != latent_width:
+                    ref_latents = torch.nn.functional.interpolate(
+                        ref_latents, size=(latent_height, latent_width), mode="bilinear"
+                    )
+            else:
+                # No reference image provided — use zeros so the model still gets the
+                # expected sequence layout.
+                ref_latents = torch.zeros(
+                    1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
                 )
+            ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)
+
+        # img_shapes tells the transformer the spatial layout of patches.
+        if use_ref_latents:
+            img_shapes = [
+                [
+                    (1, latent_height // 2, latent_width // 2),
+                    (1, latent_height // 2, latent_width // 2),
+                ]
+            ]
         else:
-            # No reference image provided — use zeros so the model still gets the
-            # expected sequence layout.
-            ref_latents = torch.zeros(
-                1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
-            )
-        ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)
-
-        # img_shapes tells the transformer the spatial layout of noisy and reference patches.
-        img_shapes = [
-            [
-                (1, latent_height // 2, latent_width // 2),
-                (1, latent_height // 2, latent_width // 2),
+            img_shapes = [
+                [
+                    (1, latent_height // 2, latent_width // 2),
+                ]
             ]
-        ]
 
         # Prepare inpaint extension (operates in 4D space, so unpack/repack around it)
         inpaint_mask = self._prep_inpaint_mask(context, noise)  # noise has the right 4D shape
@@ -422,14 +448,16 @@ def _run_diffusion(self, context: InvocationContext):
                 )
             )
 
-            scheduler.set_begin_index(0)
-
             for step_idx, t in enumerate(tqdm(timesteps_sched)):
                 # The pipeline passes timestep / 1000 to the transformer
                 timestep = t.expand(latents.shape[0]).to(inference_dtype)
 
-                # Concatenate noisy and reference patches along the sequence dim
-                model_input = torch.cat([latents, ref_latents_packed], dim=1)
+                # For edit models: concatenate noisy and reference patches along the sequence dim
+                # For txt2img models: just use noisy patches
+                if ref_latents_packed is not None:
+                    model_input = torch.cat([latents, ref_latents_packed], dim=1)
+                else:
+                    model_input = latents
 
                 noise_pred_cond = transformer(
                     hidden_states=model_input,
@@ -457,8 +485,12 @@ def _run_diffusion(self, context: InvocationContext):
                 else:
                     noise_pred = noise_pred_cond
 
-                # Use the scheduler's step method — exactly matching the pipeline
-                latents = scheduler.step(noise_pred, t, latents, return_dict=False)[0]
+                # Euler step using the (possibly clipped) sigma schedule
+                sigma_curr = sigmas_sched[step_idx]
+                sigma_next = sigmas_sched[step_idx + 1]
+                dt = sigma_next - sigma_curr
+                latents = latents.to(torch.float32) + dt * noise_pred.to(torch.float32)
+                latents = latents.to(inference_dtype)
 
                 if inpaint_extension is not None:
                     sigma_next = sigmas_sched[step_idx + 1].item()
diff --git a/invokeai/app/invocations/qwen_image_image_to_latents.py b/invokeai/app/invocations/qwen_image_image_to_latents.py
index 19d233a7073..c5fe1b5d5c8 100644
--- a/invokeai/app/invocations/qwen_image_image_to_latents.py
+++ b/invokeai/app/invocations/qwen_image_image_to_latents.py
@@ -22,14 +22,14 @@
 
 @invocation(
     "qwen_image_i2l",
-    title="Image to Latents - Qwen Image Edit",
+    title="Image to Latents - Qwen Image",
     tags=["image", "latents", "vae", "i2l", "qwen_image"],
     category="image",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Generates latents from an image using the Qwen Image Edit VAE."""
+    """Generates latents from an image using the Qwen Image VAE."""
 
     image: ImageField = InputField(description="The image to encode.")
     vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
@@ -51,7 +51,7 @@ def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tenso
 
             image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae.dtype)
             with torch.inference_mode():
-                # The Qwen Image Edit VAE expects 5D input: (B, C, num_frames, H, W)
+                # The Qwen Image VAE expects 5D input: (B, C, num_frames, H, W)
                 if image_tensor.dim() == 4:
                     image_tensor = image_tensor.unsqueeze(2)
 
diff --git a/invokeai/app/invocations/qwen_image_latents_to_image.py b/invokeai/app/invocations/qwen_image_latents_to_image.py
index f1bce204806..b3ea39c4bbf 100644
--- a/invokeai/app/invocations/qwen_image_latents_to_image.py
+++ b/invokeai/app/invocations/qwen_image_latents_to_image.py
@@ -23,14 +23,14 @@
 
 @invocation(
     "qwen_image_l2i",
-    title="Latents to Image - Qwen Image Edit",
+    title="Latents to Image - Qwen Image",
     tags=["latents", "image", "vae", "l2i", "qwen_image"],
     category="latents",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageLatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Generates an image from latents using the Qwen Image Edit VAE."""
+    """Generates an image from latents using the Qwen Image VAE."""
 
     latents: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection)
     vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
@@ -56,7 +56,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
             TorchDevice.empty_cache()
 
             with torch.inference_mode(), tiling_context:
-                # The Qwen Image Edit VAE uses per-channel latents_mean / latents_std
+                # The Qwen Image VAE uses per-channel latents_mean / latents_std
                 # instead of a single scaling_factor.
                 # Latents are 5D: (B, C, num_frames, H, W) — the unpack from the
                 # denoise step already produces this shape.
diff --git a/invokeai/app/invocations/qwen_image_lora_loader.py b/invokeai/app/invocations/qwen_image_lora_loader.py
index fb056166153..f670b2d8954 100644
--- a/invokeai/app/invocations/qwen_image_lora_loader.py
+++ b/invokeai/app/invocations/qwen_image_lora_loader.py
@@ -15,7 +15,7 @@
 
 @invocation_output("qwen_image_lora_loader_output")
 class QwenImageLoRALoaderOutput(BaseInvocationOutput):
-    """Qwen Image Edit LoRA Loader Output"""
+    """Qwen Image LoRA Loader Output"""
 
     transformer: Optional[TransformerField] = OutputField(
         default=None, description=FieldDescriptions.transformer, title="Transformer"
@@ -24,14 +24,14 @@ class QwenImageLoRALoaderOutput(BaseInvocationOutput):
 
 @invocation(
     "qwen_image_lora_loader",
-    title="Apply LoRA - Qwen Image Edit",
+    title="Apply LoRA - Qwen Image",
     tags=["lora", "model", "qwen_image"],
     category="model",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageLoRALoaderInvocation(BaseInvocation):
-    """Apply a LoRA model to a Qwen Image Edit transformer."""
+    """Apply a LoRA model to a Qwen Image transformer."""
 
     lora: ModelIdentifierField = InputField(
         description=FieldDescriptions.lora_model,
@@ -72,14 +72,14 @@ def invoke(self, context: InvocationContext) -> QwenImageLoRALoaderOutput:
 
 @invocation(
     "qwen_image_lora_collection_loader",
-    title="Apply LoRA Collection - Qwen Image Edit",
+    title="Apply LoRA Collection - Qwen Image",
     tags=["lora", "model", "qwen_image"],
     category="model",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageLoRACollectionLoader(BaseInvocation):
-    """Applies a collection of LoRAs to a Qwen Image Edit transformer."""
+    """Applies a collection of LoRAs to a Qwen Image transformer."""
 
     loras: Optional[LoRAField | list[LoRAField]] = InputField(
         default=None, description="LoRA models and weights. May be a single LoRA or collection.", title="LoRAs"
diff --git a/invokeai/app/invocations/qwen_image_model_loader.py b/invokeai/app/invocations/qwen_image_model_loader.py
index e2d21820b05..fd96067f561 100644
--- a/invokeai/app/invocations/qwen_image_model_loader.py
+++ b/invokeai/app/invocations/qwen_image_model_loader.py
@@ -20,7 +20,7 @@
 
 @invocation_output("qwen_image_model_loader_output")
 class QwenImageModelLoaderOutput(BaseInvocationOutput):
-    """Qwen Image Edit base model loader output."""
+    """Qwen Image model loader output."""
 
     transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
     qwen_vl_encoder: QwenVLEncoderField = OutputField(
@@ -31,14 +31,14 @@ class QwenImageModelLoaderOutput(BaseInvocationOutput):
 
 @invocation(
     "qwen_image_model_loader",
-    title="Main Model - Qwen Image Edit",
+    title="Main Model - Qwen Image",
     tags=["model", "qwen_image"],
     category="model",
     version="1.1.0",
     classification=Classification.Prototype,
 )
 class QwenImageModelLoaderInvocation(BaseInvocation):
-    """Loads a Qwen Image Edit model, outputting its submodels.
+    """Loads a Qwen Image model, outputting its submodels.
 
     The transformer is always loaded from the main model (Diffusers or GGUF).
 
@@ -59,7 +59,7 @@ class QwenImageModelLoaderInvocation(BaseInvocation):
 
     component_source: Optional[ModelIdentifierField] = InputField(
         default=None,
-        description="Diffusers Qwen Image Edit model to extract the VAE and Qwen VL encoder from. "
+        description="Diffusers Qwen Image model to extract the VAE and Qwen VL encoder from. "
         "Required when using a GGUF quantized transformer. "
         "Ignored when the main model is already in Diffusers format.",
         input=Input.Direct,
@@ -96,7 +96,7 @@ def invoke(self, context: InvocationContext) -> QwenImageModelLoaderOutput:
             raise ValueError(
                 "No source for VAE and Qwen VL encoder. "
                 "GGUF quantized models only contain the transformer — "
-                "please set 'Component Source' to a Diffusers Qwen Image Edit model "
+                "please set 'Component Source' to a Diffusers Qwen Image model "
                 "to provide the VAE and text encoder."
             )
 
diff --git a/invokeai/app/invocations/qwen_image_text_encoder.py b/invokeai/app/invocations/qwen_image_text_encoder.py
index 641e8c4d388..a067421452e 100644
--- a/invokeai/app/invocations/qwen_image_text_encoder.py
+++ b/invokeai/app/invocations/qwen_image_text_encoder.py
@@ -20,39 +20,57 @@
     QwenImageConditioningInfo,
 )
 
-# The Qwen Image Edit pipeline uses a specific system prompt and drops the first
-# N tokens (the system prompt prefix) from the embeddings.  These constants are
-# taken directly from the diffusers QwenImagePipeline.
-_SYSTEM_PROMPT = (
+# Prompt templates and drop indices for the two Qwen Image model modes.
+# These are taken directly from the diffusers pipelines.
+
+# Image editing mode (QwenImagePipeline)
+_EDIT_SYSTEM_PROMPT = (
     "Describe the key features of the input image (color, shape, size, texture, objects, background), "
     "then explain how the user's text instruction should alter or modify the image. "
     "Generate a new image that meets the user's requirements while maintaining consistency "
     "with the original input where appropriate."
 )
+_EDIT_DROP_IDX = 64
+
+# Text-to-image mode (QwenImagePipeline)
+_GENERATE_SYSTEM_PROMPT = (
+    "Describe the image by detailing the color, shape, size, texture, quantity, "
+    "text, spatial relationships of the objects and background:"
+)
+_GENERATE_DROP_IDX = 34
+
 _IMAGE_PLACEHOLDER = "<|vision_start|><|image_pad|><|vision_end|>"
-_DROP_IDX = 64
 
 
 def _build_prompt(user_prompt: str, num_images: int) -> str:
-    """Build the full prompt with one vision placeholder per reference image."""
-    image_tokens = _IMAGE_PLACEHOLDER * max(num_images, 1)
-    return (
-        f"<|im_start|>system\n{_SYSTEM_PROMPT}<|im_end|>\n"
-        f"<|im_start|>user\n{image_tokens}{user_prompt}<|im_end|>\n"
-        "<|im_start|>assistant\n"
-    )
+    """Build the full prompt with the appropriate template based on whether reference images are provided."""
+    if num_images > 0:
+        # Edit mode: include vision placeholders for reference images
+        image_tokens = _IMAGE_PLACEHOLDER * num_images
+        return (
+            f"<|im_start|>system\n{_EDIT_SYSTEM_PROMPT}<|im_end|>\n"
+            f"<|im_start|>user\n{image_tokens}{user_prompt}<|im_end|>\n"
+            "<|im_start|>assistant\n"
+        )
+    else:
+        # Generate mode: text-only prompt
+        return (
+            f"<|im_start|>system\n{_GENERATE_SYSTEM_PROMPT}<|im_end|>\n"
+            f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
+            "<|im_start|>assistant\n"
+        )
 
 
 @invocation(
     "qwen_image_text_encoder",
-    title="Prompt - Qwen Image Edit",
+    title="Prompt - Qwen Image",
     tags=["prompt", "conditioning", "qwen_image"],
     category="conditioning",
     version="1.2.0",
     classification=Classification.Prototype,
 )
 class QwenImageTextEncoderInvocation(BaseInvocation):
-    """Encodes text and reference images for Qwen Image Edit using Qwen2.5-VL."""
+    """Encodes text and reference images for Qwen Image using Qwen2.5-VL."""
 
     prompt: str = InputField(description="Text prompt describing the desired edit.", ui_component=UIComponent.Textarea)
     reference_images: list[ImageField] = InputField(
@@ -188,7 +206,10 @@ def _encode(
             hidden_states = outputs.hidden_states[-1]
 
             # Extract valid (non-padding) tokens using the attention mask,
-            # then drop the first _DROP_IDX tokens (system prompt prefix).
+            # then drop the system prompt prefix tokens.
+            # The drop index differs between edit mode (64) and generate mode (34).
+            drop_idx = _EDIT_DROP_IDX if images else _GENERATE_DROP_IDX
+
             attn_mask = model_inputs.attention_mask
             bool_mask = attn_mask.bool()
             valid_lengths = bool_mask.sum(dim=1)
@@ -196,7 +217,7 @@ def _encode(
             split_hidden = torch.split(selected, valid_lengths.tolist(), dim=0)
 
             # Drop system prefix tokens and build padded output
-            trimmed = [h[_DROP_IDX:] for h in split_hidden]
+            trimmed = [h[drop_idx:] for h in split_hidden]
             attn_mask_list = [torch.ones(h.size(0), dtype=torch.long, device=device) for h in trimmed]
             max_seq_len = max(h.size(0) for h in trimmed)
 
diff --git a/invokeai/app/services/model_records/model_records_base.py b/invokeai/app/services/model_records/model_records_base.py
index ea5b9ef7546..dcdc0ce5956 100644
--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@@ -25,8 +25,8 @@
     ModelSourceType,
     ModelType,
     ModelVariantType,
-    QwenImageVariantType,
     Qwen3VariantType,
+    QwenImageVariantType,
     SchedulerPredictionType,
     ZImageVariantType,
 )
@@ -95,7 +95,13 @@ class ModelRecordChanges(BaseModelExcludeNull):
     # Checkpoint-specific changes
     # TODO(MM2): Should we expose these? Feels footgun-y...
     variant: Optional[
-        ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType
+        ModelVariantType
+        | ClipVariantType
+        | FluxVariantType
+        | Flux2VariantType
+        | ZImageVariantType
+        | QwenImageVariantType
+        | Qwen3VariantType
     ] = Field(description="The variant of the model.", default=None)
     prediction_type: Optional[SchedulerPredictionType] = Field(
         description="The prediction type of the model.", default=None
diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py
index a5b9f40631d..05698a3c33a 100644
--- a/invokeai/backend/model_manager/configs/lora.py
+++ b/invokeai/backend/model_manager/configs/lora.py
@@ -769,29 +769,62 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
 
         has_qwen_ie_keys = state_dict_has_any_keys_starting_with(
             state_dict,
-            {"transformer_blocks.", "transformer.transformer_blocks."},
+            {
+                "transformer_blocks.",
+                "transformer.transformer_blocks.",
+                "lora_unet_transformer_blocks_",  # Kohya format
+            },
         )
         has_lora_suffix = state_dict_has_any_keys_ending_with(
             state_dict,
-            {"lora_A.weight", "lora_B.weight", "lora_down.weight", "lora_up.weight", "dora_scale"},
+            {
+                "lora_A.weight", "lora_B.weight", "lora_down.weight", "lora_up.weight",
+                "dora_scale", "lokr_w1", "lokr_w2",  # LoKR format
+            },
         )
-        # Must NOT have diffusion_model.layers (Z-Image) or double_blocks/single_blocks (Flux)
+        # Must NOT have diffusion_model.layers (Z-Image) or Flux-style keys.
+        # Flux LoRAs can have transformer.single_transformer_blocks or transformer.transformer_blocks
+        # (with the "transformer." prefix and "single_" variant) which would falsely match our check.
+        # Flux Kohya LoRAs use lora_unet_double_blocks or lora_unet_single_blocks.
         has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."})
-        has_flux_keys = state_dict_has_any_keys_starting_with(state_dict, {"double_blocks.", "single_blocks."})
+        has_flux_keys = state_dict_has_any_keys_starting_with(
+            state_dict,
+            {
+                "double_blocks.",
+                "single_blocks.",
+                "single_transformer_blocks.",
+                "transformer.single_transformer_blocks.",
+                "lora_unet_double_blocks_",
+                "lora_unet_single_blocks_",
+                "lora_unet_single_transformer_blocks_",
+            },
+        )
 
         if has_qwen_ie_keys and has_lora_suffix and not has_z_image_keys and not has_flux_keys:
             return
 
-        raise NotAMatchError("model does not match Qwen Image Edit LoRA heuristics")
+        raise NotAMatchError("model does not match Qwen Image LoRA heuristics")
 
     @classmethod
     def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
         state_dict = mod.load_state_dict()
         has_qwen_ie_keys = state_dict_has_any_keys_starting_with(
-            state_dict, {"transformer_blocks.", "transformer.transformer_blocks."}
+            state_dict,
+            {"transformer_blocks.", "transformer.transformer_blocks.", "lora_unet_transformer_blocks_"},
         )
         has_z_image_keys = state_dict_has_any_keys_starting_with(state_dict, {"diffusion_model.layers."})
-        has_flux_keys = state_dict_has_any_keys_starting_with(state_dict, {"double_blocks.", "single_blocks."})
+        has_flux_keys = state_dict_has_any_keys_starting_with(
+            state_dict,
+            {
+                "double_blocks.",
+                "single_blocks.",
+                "single_transformer_blocks.",
+                "transformer.single_transformer_blocks.",
+                "lora_unet_double_blocks_",
+                "lora_unet_single_blocks_",
+                "lora_unet_single_transformer_blocks_",
+            },
+        )
 
         if has_qwen_ie_keys and not has_z_image_keys and not has_flux_keys:
             return BaseModelType.QwenImage
diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py
index 484a95f4bb8..6ec0611fdf3 100644
--- a/invokeai/backend/model_manager/configs/main.py
+++ b/invokeai/backend/model_manager/configs/main.py
@@ -1208,7 +1208,7 @@ class Main_Diffusers_QwenImage_Config(Diffusers_Config_Base, Main_Config_Base, C
     """Model config for Qwen Image diffusers models (both txt2img and edit)."""
 
     base: Literal[BaseModelType.QwenImage] = Field(BaseModelType.QwenImage)
-    variant: QwenImageVariantType = Field(default=QwenImageVariantType.Generate)
+    variant: QwenImageVariantType | None = Field(default=None)
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -1269,7 +1269,7 @@ class Main_GGUF_QwenImage_Config(Checkpoint_Config_Base, Main_Config_Base, Confi
 
     base: Literal[BaseModelType.QwenImage] = Field(default=BaseModelType.QwenImage)
     format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized)
-    variant: QwenImageVariantType = Field(default=QwenImageVariantType.Generate)
+    variant: QwenImageVariantType | None = Field(default=None)
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
diff --git a/invokeai/backend/model_manager/load/model_loaders/qwen_image.py b/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
index 15fcedba166..a025e727945 100644
--- a/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
+++ b/invokeai/backend/model_manager/load/model_loaders/qwen_image.py
@@ -15,6 +15,7 @@
     BaseModelType,
     ModelFormat,
     ModelType,
+    QwenImageVariantType,
     SubModelType,
 )
 from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
@@ -160,10 +161,13 @@ def _load_from_singlefile(self, config: AnyModelConfig) -> AnyModel:
             "axes_dims_rope": (16, 56, 56),
         }
 
-        # zero_cond_t was added in diffusers 0.37+; skip it on older versions
+        # zero_cond_t is only used by edit-variant models. It enables dual modulation
+        # for noisy vs reference patches. Setting it on txt2img models produces garbage.
+        # Also requires diffusers 0.37+ (the parameter doesn't exist in older versions).
         import inspect
 
-        if "zero_cond_t" in inspect.signature(QwenImageTransformer2DModel.__init__).parameters:
+        is_edit = getattr(config, "variant", None) == QwenImageVariantType.Edit
+        if is_edit and "zero_cond_t" in inspect.signature(QwenImageTransformer2DModel.__init__).parameters:
             model_config["zero_cond_t"] = True
 
         with accelerate.init_empty_weights():
diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py
index de5f1e1b8b6..ef7b25431a0 100644
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -650,7 +650,7 @@ class StarterModelBundle(BaseModel):
 # endregion
 
 # region Qwen Image Edit
-qwen_image = StarterModel(
+qwen_image_edit = StarterModel(
     name="Qwen Image Edit 2511",
     base=BaseModelType.QwenImage,
     source="Qwen/Qwen-Image-Edit-2511",
@@ -658,43 +658,43 @@ class StarterModelBundle(BaseModel):
     type=ModelType.Main,
 )
 
-qwen_image_gguf_q4_k_m = StarterModel(
+qwen_image_edit_gguf_q4_k_m = StarterModel(
     name="Qwen Image Edit 2511 (Q4_K_M)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q4_K_M.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q4_K_M.gguf",
     description="Qwen Image Edit 2511 - Q4_K_M quantized transformer. Good quality/size balance. (~13GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q2_k = StarterModel(
+qwen_image_edit_gguf_q2_k = StarterModel(
     name="Qwen Image Edit 2511 (Q2_K)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q2_K.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q2_K.gguf",
     description="Qwen Image Edit 2511 - Q2_K heavily quantized transformer. Smallest size, lower quality. (~7.5GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q6_k = StarterModel(
+qwen_image_edit_gguf_q6_k = StarterModel(
     name="Qwen Image Edit 2511 (Q6_K)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q6_K.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q6_K.gguf",
     description="Qwen Image Edit 2511 - Q6_K quantized transformer. Near-lossless quality. (~17GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_gguf_q8_0 = StarterModel(
+qwen_image_edit_gguf_q8_0 = StarterModel(
     name="Qwen Image Edit 2511 (Q8_0)",
     base=BaseModelType.QwenImage,
-    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-2511-Q8_0.gguf",
+    source="https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/resolve/main/qwen-image-edit-2511-Q8_0.gguf",
     description="Qwen Image Edit 2511 - Q8_0 quantized transformer. Highest quality quantization. (~22GB)",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
 )
 
-qwen_image_lightning_4step = StarterModel(
+qwen_image_edit_lightning_4step = StarterModel(
     name="Qwen Image Edit Lightning (4-step, bf16)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning/resolve/main/Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors",
@@ -703,7 +703,7 @@ class StarterModelBundle(BaseModel):
     type=ModelType.LoRA,
 )
 
-qwen_image_lightning_8step = StarterModel(
+qwen_image_edit_lightning_8step = StarterModel(
     name="Qwen Image Edit Lightning (8-step, bf16)",
     base=BaseModelType.QwenImage,
     source="https://huggingface.co/lightx2v/Qwen-Image-Edit-2511-Lightning/resolve/main/Qwen-Image-Edit-2511-Lightning-8steps-V1.0-bf16.safetensors",
@@ -711,6 +711,69 @@ class StarterModelBundle(BaseModel):
     "Settings: Steps=8, CFG=1, Shift Override=3.",
     type=ModelType.LoRA,
 )
+
+# Qwen Image (txt2img)
+qwen_image = StarterModel(
+    name="Qwen Image 2512",
+    base=BaseModelType.QwenImage,
+    source="Qwen/Qwen-Image-2512",
+    description="Qwen Image 2512 full diffusers model. High-quality text-to-image generation. (~40GB)",
+    type=ModelType.Main,
+)
+
+qwen_image_gguf_q4_k_m = StarterModel(
+    name="Qwen Image 2512 (Q4_K_M)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q4_K_M.gguf",
+    description="Qwen Image 2512 - Q4_K_M quantized transformer. Good quality/size balance. (~13GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q2_k = StarterModel(
+    name="Qwen Image 2512 (Q2_K)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q2_K.gguf",
+    description="Qwen Image 2512 - Q2_K heavily quantized transformer. Smallest size, lower quality. (~7.5GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q6_k = StarterModel(
+    name="Qwen Image 2512 (Q6_K)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q6_K.gguf",
+    description="Qwen Image 2512 - Q6_K quantized transformer. Near-lossless quality. (~17GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_gguf_q8_0 = StarterModel(
+    name="Qwen Image 2512 (Q8_0)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/unsloth/Qwen-Image-2512-GGUF/resolve/main/qwen-image-2512-Q8_0.gguf",
+    description="Qwen Image 2512 - Q8_0 quantized transformer. Highest quality quantization. (~22GB)",
+    type=ModelType.Main,
+    format=ModelFormat.GGUFQuantized,
+)
+
+qwen_image_lightning_4step = StarterModel(
+    name="Qwen Image Lightning (4-step, V2.0, bf16)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors",
+    description="Lightning distillation LoRA for Qwen Image — enables generation in just 4 steps. "
+    "Settings: Steps=4, CFG=1, Shift Override=3.",
+    type=ModelType.LoRA,
+)
+
+qwen_image_lightning_8step = StarterModel(
+    name="Qwen Image Lightning (8-step, V2.0, bf16)",
+    base=BaseModelType.QwenImage,
+    source="https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors",
+    description="Lightning distillation LoRA for Qwen Image — enables generation in 8 steps with better quality. "
+    "Settings: Steps=8, CFG=1, Shift Override=3.",
+    type=ModelType.LoRA,
+)
 # endregion
 
 # region SigLIP
@@ -1012,6 +1075,13 @@ class StarterModelBundle(BaseModel):
     flux2_klein_qwen3_4b_encoder,
     flux2_klein_qwen3_8b_encoder,
     cogview4,
+    qwen_image_edit,
+    qwen_image_edit_gguf_q2_k,
+    qwen_image_edit_gguf_q4_k_m,
+    qwen_image_edit_gguf_q6_k,
+    qwen_image_edit_gguf_q8_0,
+    qwen_image_edit_lightning_4step,
+    qwen_image_edit_lightning_8step,
     qwen_image,
     qwen_image_gguf_q2_k,
     qwen_image_gguf_q4_k_m,
@@ -1097,9 +1167,13 @@ class StarterModelBundle(BaseModel):
 ]
 
 qwen_image_bundle: list[StarterModel] = [
+    qwen_image_edit,
+    qwen_image_edit_gguf_q4_k_m,
+    qwen_image_edit_gguf_q8_0,
+    qwen_image_edit_lightning_4step,
+    qwen_image_edit_lightning_8step,
     qwen_image,
     qwen_image_gguf_q4_k_m,
-    qwen_image_gguf_q8_0,
     qwen_image_lightning_4step,
     qwen_image_lightning_8step,
 ]
diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py
index 9250310a29a..587c0b0625f 100644
--- a/invokeai/backend/model_manager/taxonomy.py
+++ b/invokeai/backend/model_manager/taxonomy.py
@@ -225,8 +225,28 @@ class FluxLoRAFormat(str, Enum):
 
 
 AnyVariant: TypeAlias = Union[
-    ModelVariantType, ClipVariantType, FluxVariantType, Flux2VariantType, ZImageVariantType, QwenImageVariantType, Qwen3VariantType
+    ModelVariantType,
+    ClipVariantType,
+    FluxVariantType,
+    Flux2VariantType,
+    ZImageVariantType,
+    QwenImageVariantType,
+    Qwen3VariantType,
 ]
 variant_type_adapter = TypeAdapter[
-    ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType
-](ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | ZImageVariantType | QwenImageVariantType | Qwen3VariantType)
+    ModelVariantType
+    | ClipVariantType
+    | FluxVariantType
+    | Flux2VariantType
+    | ZImageVariantType
+    | QwenImageVariantType
+    | Qwen3VariantType
+](
+    ModelVariantType
+    | ClipVariantType
+    | FluxVariantType
+    | Flux2VariantType
+    | ZImageVariantType
+    | QwenImageVariantType
+    | Qwen3VariantType
+)
diff --git a/invokeai/backend/patches/lora_conversions/qwen_image_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/qwen_image_lora_conversion_utils.py
index 7488e0e72e3..df8aa2ef566 100644
--- a/invokeai/backend/patches/lora_conversions/qwen_image_lora_conversion_utils.py
+++ b/invokeai/backend/patches/lora_conversions/qwen_image_lora_conversion_utils.py
@@ -1,9 +1,13 @@
-"""Qwen Image Edit LoRA conversion utilities.
+"""Qwen Image LoRA conversion utilities.
 
-Qwen Image Edit uses QwenImageTransformer2DModel architecture.
-LoRAs follow the standard format with lora_down.weight/lora_up.weight/alpha keys.
+Qwen Image uses QwenImageTransformer2DModel architecture.
+Supports multiple LoRA formats:
+- Diffusers/PEFT: transformer_blocks.0.attn.to_k.lora_down.weight
+- With prefix: transformer.transformer_blocks.0.attn.to_k.lora_down.weight
+- Kohya: lora_unet_transformer_blocks_0_attn_to_k.lora_down.weight (underscores instead of dots)
 """
 
+import re
 from typing import Dict
 
 import torch
@@ -15,23 +19,117 @@
 )
 from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
 
+# Regex for Kohya-format Qwen Image LoRA keys.
+# Example: lora_unet_transformer_blocks_0_attn_to_k
+# Groups: (block_idx, sub_module_with_underscores)
+_KOHYA_KEY_REGEX = re.compile(r"lora_unet_transformer_blocks_(\d+)_(.*)")
+
+# Mapping from Kohya underscore-separated sub-module names to dot-separated model paths.
+# The Kohya format uses underscores everywhere, but some underscores are part of the
+# module name (e.g., add_k_proj, to_out). We match the longest prefix first.
+_KOHYA_MODULE_MAP: list[tuple[str, str]] = [
+    # Attention projections
+    ("attn_add_k_proj", "attn.add_k_proj"),
+    ("attn_add_q_proj", "attn.add_q_proj"),
+    ("attn_add_v_proj", "attn.add_v_proj"),
+    ("attn_to_add_out", "attn.to_add_out"),
+    ("attn_to_out_0", "attn.to_out.0"),
+    ("attn_to_k", "attn.to_k"),
+    ("attn_to_q", "attn.to_q"),
+    ("attn_to_v", "attn.to_v"),
+    # Image stream MLP and modulation
+    ("img_mlp_net_0_proj", "img_mlp.net.0.proj"),
+    ("img_mlp_net_2", "img_mlp.net.2"),
+    ("img_mod_1", "img_mod.1"),
+    # Text stream MLP and modulation
+    ("txt_mlp_net_0_proj", "txt_mlp.net.0.proj"),
+    ("txt_mlp_net_2", "txt_mlp.net.2"),
+    ("txt_mod_1", "txt_mod.1"),
+]
+
+
+def is_state_dict_likely_kohya_qwen_image(state_dict: dict[str | int, torch.Tensor]) -> bool:
+    """Check if the state dict uses Kohya-format Qwen Image LoRA keys."""
+    str_keys = [k for k in state_dict.keys() if isinstance(k, str)]
+    if not str_keys:
+        return False
+    # Check if any key matches the Kohya pattern
+    return any(k.startswith("lora_unet_transformer_blocks_") for k in str_keys)
+
+
+def _convert_kohya_key(kohya_layer: str) -> str | None:
+    """Convert a Kohya-format layer name to a dot-separated model module path.
+
+    Example: lora_unet_transformer_blocks_0_attn_to_k -> transformer_blocks.0.attn.to_k
+    """
+    m = _KOHYA_KEY_REGEX.match(kohya_layer)
+    if not m:
+        return None
+
+    block_idx = m.group(1)
+    sub_module = m.group(2)
+
+    for kohya_name, model_path in _KOHYA_MODULE_MAP:
+        if sub_module == kohya_name:
+            return f"transformer_blocks.{block_idx}.{model_path}"
+
+    # Fallback: unknown sub-module, return None so caller can warn/skip
+    return None
+
 
 def lora_model_from_qwen_image_state_dict(
     state_dict: Dict[str, torch.Tensor], alpha: float | None = None
 ) -> ModelPatchRaw:
-    """Convert a Qwen Image Edit LoRA state dict to a ModelPatchRaw.
-
-    The Lightning LoRA keys are in the format:
-        transformer_blocks.0.attn.to_k.lora_down.weight
-        transformer_blocks.0.attn.to_k.lora_up.weight
-        transformer_blocks.0.attn.to_k.alpha
+    """Convert a Qwen Image LoRA state dict to a ModelPatchRaw.
 
-    These are already the correct module paths for QwenImageTransformer2DModel.
+    Handles three key formats:
+    - Diffusers/PEFT: transformer_blocks.0.attn.to_k.lora_down.weight
+    - With prefix: transformer.transformer_blocks.0.attn.to_k.lora_down.weight
+    - Kohya: lora_unet_transformer_blocks_0_attn_to_k.lora_down.weight
     """
+    is_kohya = is_state_dict_likely_kohya_qwen_image(state_dict)
+
+    if is_kohya:
+        return _convert_kohya_format(state_dict, alpha)
+    else:
+        return _convert_diffusers_format(state_dict, alpha)
+
+
+def _convert_kohya_format(
+    state_dict: Dict[str, torch.Tensor], alpha: float | None
+) -> ModelPatchRaw:
+    """Convert Kohya-format state dict. Keys are like lora_unet_transformer_blocks_0_attn_to_k.lokr_w1"""
+    layers: dict[str, BaseLayerPatch] = {}
+
+    # Group by layer (split at first dot: layer_name.param_name)
+    grouped: dict[str, dict[str, torch.Tensor]] = {}
+    for key, value in state_dict.items():
+        if not isinstance(key, str):
+            continue
+        layer_name, param_name = key.split(".", 1)
+        if layer_name not in grouped:
+            grouped[layer_name] = {}
+        grouped[layer_name][param_name] = value
+
+    for kohya_layer, layer_dict in grouped.items():
+        model_path = _convert_kohya_key(kohya_layer)
+        if model_path is None:
+            continue  # Skip unrecognized layers
+
+        layer = any_lora_layer_from_state_dict(layer_dict)
+        final_key = f"{QWEN_IMAGE_EDIT_LORA_TRANSFORMER_PREFIX}{model_path}"
+        layers[final_key] = layer
+
+    return ModelPatchRaw(layers=layers)
+
+
+def _convert_diffusers_format(
+    state_dict: Dict[str, torch.Tensor], alpha: float | None
+) -> ModelPatchRaw:
+    """Convert Diffusers/PEFT format state dict."""
     layers: dict[str, BaseLayerPatch] = {}
 
-    # Some LoRAs use a "transformer." prefix on keys (e.g. "transformer.transformer_blocks.0.attn.to_k")
-    # while the model's module paths start at "transformer_blocks.0.attn.to_k". Strip it if present.
+    # Some LoRAs use a "transformer." prefix on keys
     strip_prefixes = ["transformer."]
 
     grouped = _group_by_layer(state_dict)
diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json
index 408caecc982..e5121b1cfa5 100644
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@@ -1501,6 +1501,7 @@
             "noFLUXVAEModelSelected": "No VAE model selected for FLUX generation",
             "noCLIPEmbedModelSelected": "No CLIP Embed model selected for FLUX generation",
             "noQwen3EncoderModelSelected": "No Qwen3 Encoder model selected for FLUX2 Klein generation",
+            "noQwenImageComponentSourceSelected": "GGUF Qwen Image models require a Diffusers Component Source for VAE/encoder",
             "noZImageVaeSourceSelected": "No VAE source: Select VAE (FLUX) or Qwen3 Source model",
             "noZImageQwen3EncoderSourceSelected": "No Qwen3 Encoder source: Select Qwen3 Encoder or Qwen3 Source model",
             "fluxModelIncompatibleBboxWidth": "$t(parameters.invoke.fluxRequiresDimensionsToBeMultipleOf16), bbox width is {{width}}",
diff --git a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
index 3cd28b5f2a0..2027ff41741 100644
--- a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
+++ b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts
@@ -80,11 +80,7 @@ export const selectDefaultControlAdapter = createSelector(
 
 export const getDefaultRefImageConfig = (
   getState: AppGetState
-):
-  | IPAdapterConfig
-  | FluxKontextReferenceImageConfig
-  | Flux2ReferenceImageConfig
-  | QwenImageReferenceImageConfig => {
+): IPAdapterConfig | FluxKontextReferenceImageConfig | Flux2ReferenceImageConfig | QwenImageReferenceImageConfig => {
   const state = getState();
 
   const mainModelConfig = selectMainModelConfig(state);
diff --git a/invokeai/frontend/web/src/features/metadata/parsing.tsx b/invokeai/frontend/web/src/features/metadata/parsing.tsx
index 7d1d511a3c2..4f179d6b017 100644
--- a/invokeai/frontend/web/src/features/metadata/parsing.tsx
+++ b/invokeai/frontend/web/src/features/metadata/parsing.tsx
@@ -13,6 +13,9 @@ import {
   kleinVaeModelSelected,
   negativePromptChanged,
   positivePromptChanged,
+  qwenImageComponentSourceSelected,
+  qwenImageQuantizationChanged,
+  qwenImageShiftChanged,
   refinerModelChanged,
   selectBase,
   setCfgRescaleMultiplier,
@@ -677,6 +680,83 @@ const ZImageSeedVarianceRandomizePercent: SingleMetadataHandler<number> = {
 };
 //#endregion ZImageSeedVarianceRandomizePercent
 
+//#region QwenImageComponentSource
+const QwenImageComponentSource: SingleMetadataHandler<ModelIdentifierField | null> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageComponentSource',
+  parse: (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_component_source');
+      if (raw === null || raw === undefined) {
+        return Promise.resolve(null);
+      }
+      return Promise.resolve(zModelIdentifierField.parse(raw));
+    } catch {
+      return Promise.resolve(null);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageComponentSourceSelected(value));
+  },
+  i18nKey: 'modelManager.qwenImageComponentSource',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<ModelIdentifierField | null>) => (
+    <MetadataPrimitiveValue value={value ? value.name : 'None'} />
+  ),
+};
+//#endregion QwenImageComponentSource
+
+//#region QwenImageQuantization
+const QwenImageQuantization: SingleMetadataHandler<'none' | 'int8' | 'nf4'> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageQuantization',
+  parse: (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_quantization');
+      const parsed = z.enum(['none', 'int8', 'nf4']).parse(raw);
+      return Promise.resolve(parsed);
+    } catch {
+      return Promise.resolve('none' as const);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageQuantizationChanged(value));
+  },
+  i18nKey: 'modelManager.qwenImageQuantization',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<'none' | 'int8' | 'nf4'>) => (
+    <MetadataPrimitiveValue value={value} />
+  ),
+};
+//#endregion QwenImageQuantization
+
+//#region QwenImageShift
+const QwenImageShift: SingleMetadataHandler<number | null> = {
+  [SingleMetadataKey]: true,
+  type: 'QwenImageShift',
+  parse: (metadata, _store) => {
+    try {
+      const raw = getProperty(metadata, 'qwen_image_shift');
+      if (raw === null || raw === undefined) {
+        return Promise.resolve(null);
+      }
+      const parsed = z.number().parse(raw);
+      return Promise.resolve(parsed);
+    } catch {
+      return Promise.resolve(null);
+    }
+  },
+  recall: (value, store) => {
+    store.dispatch(qwenImageShiftChanged(value));
+  },
+  i18nKey: 'modelManager.qwenImageShift',
+  LabelComponent: MetadataLabel,
+  ValueComponent: ({ value }: SingleMetadataValueProps<number | null>) => (
+    <MetadataPrimitiveValue value={value ?? 'Default'} />
+  ),
+};
+//#endregion QwenImageShift
+
 //#region RefinerModel
 const RefinerModel: SingleMetadataHandler<ParameterSDXLRefinerModel> = {
   [SingleMetadataKey]: true,
@@ -1233,6 +1313,9 @@ export const ImageMetadataHandlers = {
   ZImageSeedVarianceEnabled,
   ZImageSeedVarianceStrength,
   ZImageSeedVarianceRandomizePercent,
+  QwenImageComponentSource,
+  QwenImageQuantization,
+  QwenImageShift,
   LoRAs,
   CanvasLayers,
   RefImages,
diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts
index ca1d42c5a44..10afd6e44bb 100644
--- a/invokeai/frontend/web/src/features/nodes/types/common.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/common.ts
@@ -153,7 +153,7 @@ export const zModelVariantType = z.enum(['normal', 'inpaint', 'depth']);
 export const zFluxVariantType = z.enum(['dev', 'dev_fill', 'schnell']);
 export const zFlux2VariantType = z.enum(['klein_4b', 'klein_9b', 'klein_9b_base']);
 export const zZImageVariantType = z.enum(['turbo', 'zbase']);
-export const zQwenImageVariantType = z.enum(['generate', 'edit']);
+const zQwenImageVariantType = z.enum(['generate', 'edit']);
 export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b']);
 export const zAnyModelVariant = z.union([
   zModelVariantType,
diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
index e7c04744d4e..336766e5cea 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildQwenImageGraph.ts
@@ -15,11 +15,7 @@ import { addQwenImageLoRAs } from 'features/nodes/util/graph/generation/addQwenI
 import { addTextToImage } from 'features/nodes/util/graph/generation/addTextToImage';
 import { addWatermarker } from 'features/nodes/util/graph/generation/addWatermarker';
 import { Graph } from 'features/nodes/util/graph/generation/Graph';
-import {
-  getOriginalAndScaledSizesForTextToImage,
-  selectCanvasOutputFields,
-  selectPresetModifiedPrompts,
-} from 'features/nodes/util/graph/graphBuilderUtils';
+import { selectCanvasOutputFields, selectPresetModifiedPrompts } from 'features/nodes/util/graph/graphBuilderUtils';
 import type { GraphBuilderArg, GraphBuilderReturn, ImageOutputNodes } from 'features/nodes/util/graph/types';
 import { selectActiveTab } from 'features/ui/store/uiSelectors';
 import type { Invocation } from 'services/api/types';
@@ -63,13 +59,16 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
     quantization: params.qwenImageQuantization,
   });
 
-  // Negative conditioning with a blank prompt for CFG
-  const negCond = g.addNode({
-    type: 'qwen_image_text_encoder',
-    id: getPrefixedId('neg_prompt'),
-    prompt: prompts.negative || ' ',
-    quantization: params.qwenImageQuantization,
-  });
+  // Negative conditioning for CFG (only when cfg_scale > 1)
+  const useCfg = typeof cfg_scale === 'number' ? cfg_scale > 1 : true;
+  const negCond = useCfg
+    ? g.addNode({
+        type: 'qwen_image_text_encoder',
+        id: getPrefixedId('neg_prompt'),
+        prompt: prompts.negative || ' ',
+        quantization: params.qwenImageQuantization,
+      })
+    : null;
 
   const seed = g.addNode({
     id: getPrefixedId('seed'),
@@ -89,13 +88,15 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
 
   g.addEdge(modelLoader, 'transformer', denoise, 'transformer');
   g.addEdge(modelLoader, 'qwen_vl_encoder', posCond, 'qwen_vl_encoder');
-  g.addEdge(modelLoader, 'qwen_vl_encoder', negCond, 'qwen_vl_encoder');
   g.addEdge(modelLoader, 'vae', l2i, 'vae');
 
   g.addEdge(positivePrompt, 'value', posCond, 'prompt');
   g.addEdge(posCond, 'conditioning', denoise, 'positive_conditioning');
 
-  g.addEdge(negCond, 'conditioning', denoise, 'negative_conditioning');
+  if (negCond) {
+    g.addEdge(modelLoader, 'qwen_vl_encoder', negCond, 'qwen_vl_encoder');
+    g.addEdge(negCond, 'conditioning', denoise, 'negative_conditioning');
+  }
 
   g.addEdge(seed, 'value', denoise, 'seed');
   g.addEdge(denoise, 'latents', l2i, 'latents');
@@ -103,14 +104,18 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
   // Add Qwen Image Edit LoRAs if any are enabled
   addQwenImageLoRAs(state, g, denoise, modelLoader);
 
-  // Collect enabled Qwen Image Edit reference images that have an image set (image is optional for txt2img)
-  const validRefImageConfigs = selectRefImagesSlice(state).entities.filter(
-    (entity) =>
-      entity.isEnabled &&
-      isQwenImageReferenceImageConfig(entity.config) &&
-      entity.config.image !== null &&
-      getGlobalReferenceImageWarnings(entity, model).length === 0
-  );
+  // Only collect reference images for edit-variant models.
+  // For txt2img (generate) models, reference images are not used even if they exist in state.
+  const isEditModel = 'variant' in model && model.variant === 'edit';
+  const validRefImageConfigs = isEditModel
+    ? selectRefImagesSlice(state).entities.filter(
+        (entity) =>
+          entity.isEnabled &&
+          isQwenImageReferenceImageConfig(entity.config) &&
+          entity.config.image !== null &&
+          getGlobalReferenceImageWarnings(entity, model).length === 0
+      )
+    : [];
 
   if (validRefImageConfigs.length > 0) {
     const refImgCollect = g.addNode({
@@ -135,14 +140,12 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
     const firstImgField = zImageField.parse(
       firstConfig.config.image?.crop?.image ?? firstConfig.config.image?.original.image
     );
-    // Resize the reference image to the generation dimensions before VAE encoding,
-    // matching the diffusers pipeline which resizes in pixel space, not latent space.
-    const { scaledSize } = getOriginalAndScaledSizesForTextToImage(state);
+    // Don't force-resize the reference image to the output dimensions — that would
+    // distort the aspect ratio when they differ. The I2L encodes at the image's
+    // native size; the denoise node handles dimension mismatches via interpolation.
     const refI2l = g.addNode({
       type: 'qwen_image_i2l',
       id: getPrefixedId('qwen_ref_i2l'),
-      width: scaledSize.width,
-      height: scaledSize.height,
     });
     const refImageNode = g.addNode({
       type: 'image',
@@ -163,6 +166,9 @@ export const buildQwenImageGraph = async (arg: GraphBuilderArg): Promise<GraphBu
     cfg_scale,
     negative_prompt: prompts.negative,
     model: Graph.getModelMetadataField(modelConfig),
+    qwen_image_component_source: params.qwenImageComponentSource,
+    qwen_image_quantization: params.qwenImageQuantization,
+    qwen_image_shift: params.qwenImageShift,
     steps,
   });
   g.addEdgeToMetadata(seed, 'value', 'seed');
diff --git a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
index 46025d95867..3d086e6ec4a 100644
--- a/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
+++ b/invokeai/frontend/web/src/features/parameters/components/Advanced/ParamQwenImageQuantization.tsx
@@ -1,10 +1,7 @@
 import type { ComboboxOnChange, ComboboxOption } from '@invoke-ai/ui-library';
 import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library';
 import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
-import {
-  qwenImageQuantizationChanged,
-  selectQwenImageQuantization,
-} from 'features/controlLayers/store/paramsSlice';
+import { qwenImageQuantizationChanged, selectQwenImageQuantization } from 'features/controlLayers/store/paramsSlice';
 import { memo, useCallback, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 
diff --git a/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx b/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
index 18f5c4c4dd8..c93841d77b7 100644
--- a/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
+++ b/invokeai/frontend/web/src/features/parameters/components/Prompts/Prompts.tsx
@@ -22,8 +22,11 @@ export const Prompts = memo(() => {
     if (!modelSupportsRefImages) {
       return false;
     }
-    if (modelConfig?.base === 'qwen-image' && 'variant' in modelConfig && modelConfig.variant !== 'edit') {
-      return false;
+    if (modelConfig?.base === 'qwen-image') {
+      const variant = 'variant' in modelConfig ? modelConfig.variant : null;
+      if (variant !== 'edit') {
+        return false;
+      }
     }
     return true;
   }, [modelSupportsRefImages, modelConfig]);
diff --git a/invokeai/frontend/web/src/features/queue/store/readiness.ts b/invokeai/frontend/web/src/features/queue/store/readiness.ts
index 6fc0376208f..3f5a46c6381 100644
--- a/invokeai/frontend/web/src/features/queue/store/readiness.ts
+++ b/invokeai/frontend/web/src/features/queue/store/readiness.ts
@@ -257,6 +257,12 @@ const getReasonsWhyCannotEnqueueGenerateTab = (arg: {
 
   // FLUX.2 (Klein) extracts Qwen3 encoder and VAE from main model - no separate selections needed
 
+  if (model?.base === 'qwen-image' && model.format === 'gguf_quantized') {
+    if (!params.qwenImageComponentSource) {
+      reasons.push({ content: i18n.t('parameters.invoke.noQwenImageComponentSourceSelected') });
+    }
+  }
+
   if (model?.base === 'z-image') {
     // Check if VAE source is available (either separate VAE or Qwen3 Source)
     const hasVaeSource = params.zImageVaeModel !== null || params.zImageQwen3SourceModel !== null;
@@ -680,6 +686,12 @@ const getReasonsWhyCannotEnqueueCanvasTab = (arg: {
     }
   }
 
+  if (model?.base === 'qwen-image' && model.format === 'gguf_quantized') {
+    if (!params.qwenImageComponentSource) {
+      reasons.push({ content: i18n.t('parameters.invoke.noQwenImageComponentSourceSelected') });
+    }
+  }
+
   if (model?.base === 'z-image') {
     // Check if VAE source is available (either separate VAE or Qwen3 Source)
     const hasVaeSource = params.zImageVaeModel !== null || params.zImageQwen3SourceModel !== null;
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index a23217c3a81..b598719989a 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -18500,8 +18500,7 @@ export type components = {
              * @constant
              */
             base: "qwen-image";
-            /** @default generate */
-            variant: components["schemas"]["QwenImageVariantType"];
+            variant: components["schemas"]["QwenImageVariantType"] | null;
         };
         /** Main_Diffusers_SD1_Config */
         Main_Diffusers_SD1_Config: {
@@ -19234,8 +19233,7 @@ export type components = {
              * @constant
              */
             format: "gguf_quantized";
-            /** @default generate */
-            variant: components["schemas"]["QwenImageVariantType"];
+            variant: components["schemas"]["QwenImageVariantType"] | null;
         };
         /**
          * Main_GGUF_ZImage_Config
@@ -22751,8 +22749,8 @@ export type components = {
             type: "qwen_image_conditioning_output";
         };
         /**
-         * Denoise - Qwen Image Edit
-         * @description Run the denoising process with a Qwen Image Edit model.
+         * Denoise - Qwen Image
+         * @description Run the denoising process with a Qwen Image model.
          */
         QwenImageDenoiseInvocation: {
             /**
@@ -22869,8 +22867,8 @@ export type components = {
             type: "qwen_image_denoise";
         };
         /**
-         * Image to Latents - Qwen Image Edit
-         * @description Generates latents from an image using the Qwen Image Edit VAE.
+         * Image to Latents - Qwen Image
+         * @description Generates latents from an image using the Qwen Image VAE.
          */
         QwenImageImageToLatentsInvocation: {
             /**
@@ -22930,8 +22928,8 @@ export type components = {
             type: "qwen_image_i2l";
         };
         /**
-         * Latents to Image - Qwen Image Edit
-         * @description Generates an image from latents using the Qwen Image Edit VAE.
+         * Latents to Image - Qwen Image
+         * @description Generates an image from latents using the Qwen Image VAE.
          */
         QwenImageLatentsToImageInvocation: {
             /**
@@ -22979,8 +22977,8 @@ export type components = {
             type: "qwen_image_l2i";
         };
         /**
-         * Apply LoRA Collection - Qwen Image Edit
-         * @description Applies a collection of LoRAs to a Qwen Image Edit transformer.
+         * Apply LoRA Collection - Qwen Image
+         * @description Applies a collection of LoRAs to a Qwen Image transformer.
          */
         QwenImageLoRACollectionLoader: {
             /**
@@ -23020,8 +23018,8 @@ export type components = {
             type: "qwen_image_lora_collection_loader";
         };
         /**
-         * Apply LoRA - Qwen Image Edit
-         * @description Apply a LoRA model to a Qwen Image Edit transformer.
+         * Apply LoRA - Qwen Image
+         * @description Apply a LoRA model to a Qwen Image transformer.
          */
         QwenImageLoRALoaderInvocation: {
             /**
@@ -23068,7 +23066,7 @@ export type components = {
         };
         /**
          * QwenImageLoRALoaderOutput
-         * @description Qwen Image Edit LoRA Loader Output
+         * @description Qwen Image LoRA Loader Output
          */
         QwenImageLoRALoaderOutput: {
             /**
@@ -23085,8 +23083,8 @@ export type components = {
             type: "qwen_image_lora_loader_output";
         };
         /**
-         * Main Model - Qwen Image Edit
-         * @description Loads a Qwen Image Edit model, outputting its submodels.
+         * Main Model - Qwen Image
+         * @description Loads a Qwen Image model, outputting its submodels.
          *
          *     The transformer is always loaded from the main model (Diffusers or GGUF).
          *
@@ -23121,7 +23119,7 @@ export type components = {
             model: components["schemas"]["ModelIdentifierField"];
             /**
              * Component Source (Diffusers)
-             * @description Diffusers Qwen Image Edit model to extract the VAE and Qwen VL encoder from. Required when using a GGUF quantized transformer. Ignored when the main model is already in Diffusers format.
+             * @description Diffusers Qwen Image model to extract the VAE and Qwen VL encoder from. Required when using a GGUF quantized transformer. Ignored when the main model is already in Diffusers format.
              * @default null
              */
             component_source?: components["schemas"]["ModelIdentifierField"] | null;
@@ -23134,7 +23132,7 @@ export type components = {
         };
         /**
          * QwenImageModelLoaderOutput
-         * @description Qwen Image Edit base model loader output.
+         * @description Qwen Image model loader output.
          */
         QwenImageModelLoaderOutput: {
             /**
@@ -23160,8 +23158,8 @@ export type components = {
             type: "qwen_image_model_loader_output";
         };
         /**
-         * Prompt - Qwen Image Edit
-         * @description Encodes text and reference images for Qwen Image Edit using Qwen2.5-VL.
+         * Prompt - Qwen Image
+         * @description Encodes text and reference images for Qwen Image using Qwen2.5-VL.
          */
         QwenImageTextEncoderInvocation: {
             /**
diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts
index cfeb672d95e..b447f9debbe 100644
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@@ -330,10 +330,6 @@ export const isQwenImageDiffusersMainModelConfig = (config: AnyModelConfig): con
   return config.type === 'main' && config.base === 'qwen-image' && config.format === 'diffusers';
 };
 
-export const isQwenImageEditMainModelConfig = (config: AnyModelConfig): config is MainModelConfig => {
-  return config.type === 'main' && config.base === 'qwen-image' && 'variant' in config && config.variant === 'edit';
-};
-
 export const isTIModelConfig = (config: AnyModelConfig): config is MainModelConfig => {
   return config.type === 'embedding';
 };
diff --git a/tests/model_identification/stripped_models/f9f3c9fa-9449-4f90-996e-ea6be6b7d233/__test_metadata__.json b/tests/model_identification/stripped_models/f9f3c9fa-9449-4f90-996e-ea6be6b7d233/__test_metadata__.json
new file mode 100644
index 00000000000..5a41ffed04c
--- /dev/null
+++ b/tests/model_identification/stripped_models/f9f3c9fa-9449-4f90-996e-ea6be6b7d233/__test_metadata__.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32053abc6257adf4771405fddfdaed2b91497c7cd7b0ea6af0aa29f9e008ca2f
+size 233
diff --git a/tests/model_identification/stripped_models/f9f3c9fa-9449-4f90-996e-ea6be6b7d233/qwen_image_kohya_lokr_test.safetensors b/tests/model_identification/stripped_models/f9f3c9fa-9449-4f90-996e-ea6be6b7d233/qwen_image_kohya_lokr_test.safetensors
new file mode 100644
index 00000000000..6e34832a719
--- /dev/null
+++ b/tests/model_identification/stripped_models/f9f3c9fa-9449-4f90-996e-ea6be6b7d233/qwen_image_kohya_lokr_test.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b3d666baf329c922be86eacd12517cf734514da91377787d2f3cbd2b1a017c0
+size 2910