Spaces:

aryan-coder-25
/

openenv

Running

App Files Files Community

Aarush commited on 16 days ago

Commit

25762a1

1 Parent(s): b27731b

feat: Groq API support, nuanced fractional rewards, .env.example

Browse files

Files changed (4) hide show

.env.example +10 -0
.gitignore +1 -0
hybrid_agent.py +11 -4
multi_step_env.py +25 -3

.env.example ADDED Viewed

	@@ -0,0 +1,10 @@

+# ============================================
+# SQL Debug Environment — Configuration
+# ============================================
+# Copy this file to .env and fill in your keys.
+# The .env file is gitignored and will NOT be committed.
+# Optional: Only needed for the Live Dashboard demo agent
+# Supports Groq (free) or OpenAI — set ONE of these:
+GROQ_API_KEY=your_groq_key_here
+# OPENAI_API_KEY=your_openai_key_here

.gitignore CHANGED Viewed

@@ -21,6 +21,7 @@ Thumbs.db
 *.db-wal
 databases/*.db
 baseline_scores.py

 *.db-wal
 databases/*.db
+outputs/trajectories/
 baseline_scores.py

hybrid_agent.py CHANGED Viewed

@@ -52,9 +52,16 @@ class LLMPolicy:
     """
     def __init__(self, model_name="gpt-4o-mini", api_key=None):
         self.model_name = model_name
-        if "gpt" in model_name.lower():
-            if not OpenAI:
-                 raise ImportError("OpenAI SDK not installed. Run: pip install openai")
             # Prevent Streamlit UI crash if key is missing locally by injecting a placeholder
             resolved_key = api_key or os.getenv("OPENAI_API_KEY") or "mock_key_to_prevent_ui_crash"
             self.client = OpenAI(api_key=resolved_key)
@@ -87,7 +94,7 @@ class LLMPolicy:
                 )
                 return response.choices[0].message.content.strip()
             except Exception as e:
-                return "GIVE_UP\n-- Missing valid OPENAI_API_KEY. Add key to environment to enable LLM generation."
         else:
             # Fallback for unconnected local testing
             return "SHOW_TABLES"

     """
     def __init__(self, model_name="gpt-4o-mini", api_key=None):
         self.model_name = model_name
+        if not OpenAI:
+             raise ImportError("OpenAI SDK not installed. Run: pip install openai")
+        if os.getenv("GROQ_API_KEY"):
+            # Use Groq via OpenAI client compatibility
+            if "gpt" in self.model_name.lower():
+                self.model_name = "llama-3.3-70b-versatile" # Map to a strong Groq model
+            resolved_key = api_key or os.getenv("GROQ_API_KEY")
+            self.client = OpenAI(api_key=resolved_key, base_url="https://api.groq.com/openai/v1")
+        elif "gpt" in model_name.lower():
             # Prevent Streamlit UI crash if key is missing locally by injecting a placeholder
             resolved_key = api_key or os.getenv("OPENAI_API_KEY") or "mock_key_to_prevent_ui_crash"
             self.client = OpenAI(api_key=resolved_key)
                 )
                 return response.choices[0].message.content.strip()
             except Exception as e:
+                return f"GIVE_UP\n-- API Error or Missing valid API KEY. Add GROQ_API_KEY or OPENAI_API_KEY to environment. Error: {e}"
         else:
             # Fallback for unconnected local testing
             return "SHOW_TABLES"

multi_step_env.py CHANGED Viewed

@@ -180,16 +180,38 @@ class MultiStepSQLEnv(gym.Env):
                     correctness = metadata.get("correctness", 0.0) if metadata else 0.0
                     if correctness >= 1.0:
-                        reward += 1.0
                         done = True
-                        feedback = "Success! The query produces the correct result set."
                     else:
                         if err:
                             feedback = f"SQL Error: {err}"
                             reward -= 0.05
                         else:
                             feedback = ("Query executed successfully but results are incorrect. "
-                                        f"Correctness score: {correctness}")
             elif command == "GIVE_UP":
                 feedback = "Session aborted by agent."

                     correctness = metadata.get("correctness", 0.0) if metadata else 0.0
                     if correctness >= 1.0:
+                        # --- Nuanced reward: never exactly 0.0 or 1.0 ---
+                        # Base correctness component (max 0.60)
+                        correctness_reward = 0.60
+                        # Exploration bonus (max 0.20): reward agents that investigated first
+                        exploration_actions = sum(
+                            1 for act, _, _ in self.history
+                            if act.strip().upper().startswith(("EXPLAIN", "DESCRIBE", "SHOW_TABLES"))
+                        )
+                        exploration_bonus = min(0.20, exploration_actions * 0.05)
+                        # Efficiency bonus (max 0.15): reward solving in fewer steps
+                        steps_used = self.current_step
+                        efficiency_bonus = max(0.0, (self.max_steps - steps_used) / self.max_steps) * 0.15
+                        # Final reward: fractional, clamped to [0.05, 0.95]
+                        reward += round(min(0.95, max(0.05, correctness_reward + exploration_bonus + efficiency_bonus)), 4)
                         done = True
+                        feedback = (f"Success! The query produces the correct result set. "
+                                    f"(Reward breakdown: correctness={correctness_reward}, "
+                                    f"exploration={round(exploration_bonus, 4)}, "
+                                    f"efficiency={round(efficiency_bonus, 4)})")
                     else:
                         if err:
                             feedback = f"SQL Error: {err}"
                             reward -= 0.05
                         else:
+                            # Partial credit for close attempts
+                            partial = round(correctness * 0.4, 4)
+                            reward += partial
                             feedback = ("Query executed successfully but results are incorrect. "
+                                        f"Correctness score: {correctness}, partial reward: +{partial}")
             elif command == "GIVE_UP":
                 feedback = "Session aborted by agent."