Allow ops.stop_gradient() to take a variable. (#20302)

hertschuh · web-flow · commit 4f7f85285acb · 2024-09-27T18:01:42.000-07:00
Passing a tensor to `ops.stop_gradient()` always works. However, passing a variable directly would work with the Tensorflow backend but fail with an obscure error message with the JAX backend and the Torch backend, requiring users to write `ops.stop_gradient(variable.value)`.

This makes it work directly with variables, which is a common use-case.
diff --git a/keras/src/backend/jax/core.py b/keras/src/backend/jax/core.py
@@ -341,6 +341,8 @@ def fori_loop(lower, upper, body_fun, init_val):
 
 
 def stop_gradient(variable):
+    if isinstance(variable, KerasVariable):
+        variable = variable.value
     return jax.lax.stop_gradient(variable)
 
 
diff --git a/keras/src/backend/torch/core.py b/keras/src/backend/torch/core.py
@@ -645,6 +645,8 @@ def fori_loop(lower, upper, body_fun, init_val):
 
 
 def stop_gradient(variable):
+    if isinstance(variable, KerasVariable):
+        variable = variable.value
     # We can't use `.requires_grad_(False)` here since it only
     # works when the tensor is a leaf node in the graph.
     return variable.detach()
diff --git a/keras/src/ops/core_test.py b/keras/src/ops/core_test.py
@@ -565,7 +565,7 @@ def __init__(self):
                 self.b = self.add_weight(shape=(1,), initializer="zeros")
 
             def call(self, x, training=False):
-                return x * ops.stop_gradient(self.w.value) + self.b
+                return x * ops.stop_gradient(self.w) + self.b
 
         model = models.Sequential([ExampleLayer()])
         model.compile(