Files changed (2) hide show
  1. agent.py +323 -0
  2. app.py +64 -84
agent.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ agent.py — LangChain ReAct agent for the GAIA benchmark.
3
+
4
+ Tools included:
5
+ • DuckDuckGo web search (free, no API key)
6
+ • Wikipedia lookup
7
+ • Python REPL (math, data manipulation)
8
+ • File download + reading (text, CSV, PDF via pdfminer)
9
+ • Image understanding via HuggingFace Inference API (free tier)
10
+ • ArXiv search
11
+ """
12
+
13
+ import os
14
+ import io
15
+ import re
16
+ import tempfile
17
+ import traceback
18
+ from typing import Optional
19
+
20
+ import requests
21
+
22
+ # LangChain core
23
+ from langchain.agents import AgentExecutor, create_react_agent
24
+ from langchain.tools import Tool, tool
25
+ from langchain_core.prompts import PromptTemplate
26
+ from langchain_huggingface import HuggingFaceEndpoint
27
+
28
+ # Community tools
29
+ from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
30
+ from langchain_community.utilities import WikipediaAPIWrapper
31
+ from langchain_experimental.tools.python.tool import PythonREPLTool
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # 1. LLM — free HuggingFace Inference Endpoint
36
+ # ---------------------------------------------------------------------------
37
+
38
+ def get_llm():
39
+ """
40
+ Use a capable open model via HF Inference API (free tier).
41
+ Qwen2.5-72B-Instruct is a strong publicly available model.
42
+ You can swap for meta-llama/Meta-Llama-3-70B-Instruct, etc.
43
+ Requires HF_TOKEN env var (free account works).
44
+ """
45
+ hf_token = os.getenv("HF_TOKEN")
46
+ if not hf_token:
47
+ raise EnvironmentError(
48
+ "HF_TOKEN environment variable not set. "
49
+ "Add it in your HuggingFace Space secrets."
50
+ )
51
+
52
+ llm = HuggingFaceEndpoint(
53
+ repo_id="Qwen/Qwen2.5-72B-Instruct",
54
+ huggingfacehub_api_token=hf_token,
55
+ task="text-generation",
56
+ max_new_tokens=1024,
57
+ temperature=0.1,
58
+ do_sample=False,
59
+ repetition_penalty=1.1,
60
+ )
61
+ return llm
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # 2. Tool definitions
66
+ # ---------------------------------------------------------------------------
67
+
68
+ # -- Web search --
69
+ def make_search_tool():
70
+ search = DuckDuckGoSearchRun()
71
+ return Tool(
72
+ name="web_search",
73
+ func=search.run,
74
+ description=(
75
+ "Search the web for current information using DuckDuckGo. "
76
+ "Use this for facts, recent events, people, places, or anything "
77
+ "that requires up-to-date knowledge. Input: a search query string."
78
+ ),
79
+ )
80
+
81
+
82
+ # -- Wikipedia --
83
+ def make_wikipedia_tool():
84
+ wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=3, doc_content_chars_max=3000))
85
+ return Tool(
86
+ name="wikipedia",
87
+ func=wiki.run,
88
+ description=(
89
+ "Look up encyclopedic information on Wikipedia. "
90
+ "Best for well-known topics, historical facts, science, biographies. "
91
+ "Input: a topic or question string."
92
+ ),
93
+ )
94
+
95
+
96
+ # -- Python REPL --
97
+ def make_python_tool():
98
+ repl = PythonREPLTool()
99
+ return Tool(
100
+ name="python_repl",
101
+ func=repl.run,
102
+ description=(
103
+ "Execute Python code for calculations, data processing, string manipulation, "
104
+ "logic, and analysis. pandas, math, re, json, csv, datetime are available. "
105
+ "Input: valid Python code as a string. Always print() the result you need."
106
+ ),
107
+ )
108
+
109
+
110
+ # -- File reader --
111
+ @tool
112
+ def read_file_from_url(url: str) -> str:
113
+ """
114
+ Download a file from a URL and return its text content.
115
+ Supports: plain text (.txt, .py, .json, .csv, .md), PDF, and basic image description.
116
+ Input: a URL string pointing to the file.
117
+ """
118
+ try:
119
+ resp = requests.get(url, timeout=30)
120
+ resp.raise_for_status()
121
+ content_type = resp.headers.get("content-type", "")
122
+
123
+ # PDF
124
+ if "pdf" in content_type or url.lower().endswith(".pdf"):
125
+ try:
126
+ from pdfminer.high_level import extract_text
127
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
128
+ f.write(resp.content)
129
+ tmp_path = f.name
130
+ text = extract_text(tmp_path)
131
+ os.unlink(tmp_path)
132
+ return text[:5000] if text else "Could not extract PDF text."
133
+ except ImportError:
134
+ return "pdfminer not available. Install pdfminer.six to read PDFs."
135
+
136
+ # CSV
137
+ if "csv" in content_type or url.lower().endswith(".csv"):
138
+ import csv
139
+ decoded = resp.content.decode("utf-8", errors="replace")
140
+ lines = decoded.splitlines()
141
+ return "\n".join(lines[:50]) # first 50 rows
142
+
143
+ # Excel
144
+ if url.lower().endswith((".xlsx", ".xls")):
145
+ try:
146
+ import pandas as pd
147
+ from io import BytesIO
148
+ df = pd.read_excel(BytesIO(resp.content))
149
+ return df.to_string(max_rows=50)
150
+ except Exception as e:
151
+ return f"Could not read Excel file: {e}"
152
+
153
+ # Image — describe via HF
154
+ if "image" in content_type or url.lower().endswith((".png", ".jpg", ".jpeg", ".webp", ".gif")):
155
+ return describe_image_bytes(resp.content)
156
+
157
+ # Default: text
158
+ return resp.content.decode("utf-8", errors="replace")[:5000]
159
+
160
+ except Exception as e:
161
+ return f"Error reading file from {url}: {e}"
162
+
163
+
164
+ # -- Image understanding via HF Inference API --
165
+ def describe_image_bytes(image_bytes: bytes) -> str:
166
+ """Use HF Inference API to caption an image."""
167
+ hf_token = os.getenv("HF_TOKEN", "")
168
+ headers = {}
169
+ if hf_token:
170
+ headers["Authorization"] = f"Bearer {hf_token}"
171
+
172
+ # Use Salesforce BLIP image captioning (free on HF Inference API)
173
+ api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
174
+ try:
175
+ response = requests.post(api_url, headers=headers, data=image_bytes, timeout=30)
176
+ result = response.json()
177
+ if isinstance(result, list) and result:
178
+ return result[0].get("generated_text", "No caption generated.")
179
+ return str(result)
180
+ except Exception as e:
181
+ return f"Image description failed: {e}"
182
+
183
+
184
+ @tool
185
+ def describe_image_from_url(url: str) -> str:
186
+ """
187
+ Download an image from a URL and return a text description of its contents.
188
+ Use this when a question refers to an image file.
189
+ Input: a direct URL to an image (jpg, png, webp, etc.).
190
+ """
191
+ try:
192
+ resp = requests.get(url, timeout=30)
193
+ resp.raise_for_status()
194
+ return describe_image_bytes(resp.content)
195
+ except Exception as e:
196
+ return f"Could not describe image: {e}"
197
+
198
+
199
+ # -- ArXiv search --
200
+ @tool
201
+ def arxiv_search(query: str) -> str:
202
+ """
203
+ Search ArXiv for scientific papers. Use for questions about research, ML papers,
204
+ physics, mathematics, computer science publications.
205
+ Input: a search query string.
206
+ Returns: titles, authors, and abstracts of top results.
207
+ """
208
+ try:
209
+ import urllib.parse
210
+ encoded = urllib.parse.quote(query)
211
+ url = f"https://export.arxiv.org/api/query?search_query=all:{encoded}&start=0&max_results=3"
212
+ resp = requests.get(url, timeout=15)
213
+ resp.raise_for_status()
214
+
215
+ # Parse simple XML
216
+ text = resp.text
217
+ entries = re.findall(r"<entry>(.*?)</entry>", text, re.DOTALL)
218
+ results = []
219
+ for entry in entries:
220
+ title = re.search(r"<title>(.*?)</title>", entry, re.DOTALL)
221
+ summary = re.search(r"<summary>(.*?)</summary>", entry, re.DOTALL)
222
+ authors = re.findall(r"<name>(.*?)</name>", entry)
223
+ t = title.group(1).strip() if title else "?"
224
+ s = summary.group(1).strip()[:500] if summary else ""
225
+ a = ", ".join(authors[:3])
226
+ results.append(f"Title: {t}\nAuthors: {a}\nAbstract: {s}")
227
+ return "\n\n---\n\n".join(results) if results else "No results found."
228
+ except Exception as e:
229
+ return f"ArXiv search failed: {e}"
230
+
231
+
232
+ # -- Final answer formatter --
233
+ @tool
234
+ def final_answer(answer: str) -> str:
235
+ """
236
+ Use this tool to submit the final answer to the question.
237
+ Input: the exact final answer string.
238
+ """
239
+ return answer
240
+
241
+
242
+ # ---------------------------------------------------------------------------
243
+ # 3. ReAct Prompt
244
+ # ---------------------------------------------------------------------------
245
+
246
+ REACT_PROMPT = PromptTemplate.from_template(
247
+ """You are an expert AI assistant solving questions from the GAIA benchmark.
248
+ GAIA tests real-world question answering that requires reasoning, web search, file reading, and multi-step problem solving.
249
+
250
+ You have access to the following tools:
251
+ {tools}
252
+
253
+ Use this format strictly:
254
+
255
+ Question: the input question you must answer
256
+ Thought: reason step-by-step about what to do
257
+ Action: the action to take, must be one of [{tool_names}]
258
+ Action Input: the input to the action
259
+ Observation: the result of the action
260
+ ... (repeat Thought/Action/Action Input/Observation as needed)
261
+ Thought: I now know the final answer
262
+ Final Answer: the exact answer to the question
263
+
264
+ Rules:
265
+ - Be precise. GAIA expects exact answers (numbers, names, dates, etc.).
266
+ - Use web_search and wikipedia for factual lookups.
267
+ - Use python_repl for any calculations, unit conversions, or data analysis.
268
+ - Use read_file_from_url if a file URL is provided.
269
+ - Use describe_image_from_url if an image URL is provided.
270
+ - Use arxiv_search for scientific paper questions.
271
+ - If the question asks for a number, return just the number.
272
+ - If the question asks for a name, return just the name.
273
+ - Do not add explanation to Final Answer — just the answer.
274
+ - Limit reasoning to what is necessary.
275
+
276
+ Begin!
277
+
278
+ Question: {input}
279
+ Thought:{agent_scratchpad}"""
280
+ )
281
+
282
+
283
+ # ---------------------------------------------------------------------------
284
+ # 4. Build agent
285
+ # ---------------------------------------------------------------------------
286
+
287
+ def build_agent():
288
+ llm = get_llm()
289
+
290
+ tools = [
291
+ make_search_tool(),
292
+ make_wikipedia_tool(),
293
+ make_python_tool(),
294
+ read_file_from_url,
295
+ describe_image_from_url,
296
+ arxiv_search,
297
+ ]
298
+
299
+ agent = create_react_agent(
300
+ llm=llm,
301
+ tools=tools,
302
+ prompt=REACT_PROMPT,
303
+ )
304
+
305
+ executor = AgentExecutor(
306
+ agent=agent,
307
+ tools=tools,
308
+ verbose=True,
309
+ max_iterations=10,
310
+ max_execution_time=120,
311
+ handle_parsing_errors=True,
312
+ return_intermediate_steps=False,
313
+ )
314
+
315
+ def run(question: str) -> str:
316
+ try:
317
+ result = executor.invoke({"input": question})
318
+ return str(result.get("output", "No answer produced.")).strip()
319
+ except Exception as e:
320
+ print(f"Agent error: {traceback.format_exc()}")
321
+ return f"Error: {e}"
322
+
323
+ return run
app.py CHANGED
@@ -1,34 +1,22 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
 
6
 
7
- # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -38,13 +26,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
  print(agent_code)
50
 
@@ -55,49 +43,63 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
- except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
- except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
- return f"An unexpected error occurred fetching questions: {e}", None
71
 
72
- # 3. Run your Agent
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
 
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
 
82
  try:
83
- submitted_answer = agent(question_text)
 
 
 
 
 
 
 
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
98
-
99
- # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
101
  try:
102
  response = requests.post(submit_url, json=submission_data, timeout=60)
103
  response.raise_for_status()
@@ -110,60 +112,42 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
110
  f"Message: {result_data.get('message', 'No message received.')}"
111
  )
112
  print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
  except requests.exceptions.HTTPError as e:
116
  error_detail = f"Server responded with status {e.response.status_code}."
117
  try:
118
  error_json = e.response.json()
119
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
  error_detail += f" Response: {e.response.text[:500]}"
122
  status_message = f"Submission Failed: {error_detail}"
123
  print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
  except Exception as e:
137
  status_message = f"An unexpected error occurred during submission: {e}"
138
  print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
141
 
142
 
143
- # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
- **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
 
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
  """
159
  )
160
 
161
  gr.LoginButton()
162
 
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
-
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
  run_button.click(
@@ -172,25 +156,21 @@ with gr.Blocks() as demo:
172
  )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
  else:
184
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
-
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ from agent import build_agent
6
 
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+
11
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
12
  """
13
+ Fetches all questions, runs the LangChain agent on them, submits all answers,
14
  and displays the results.
15
  """
16
+ space_id = os.getenv("SPACE_ID")
 
17
 
18
  if profile:
19
+ username = f"{profile.username}"
20
  print(f"User logged in: {username}")
21
  else:
22
  print("User not logged in.")
 
26
  questions_url = f"{api_url}/questions"
27
  submit_url = f"{api_url}/submit"
28
 
29
+ # 1. Instantiate Agent
30
  try:
31
+ agent = build_agent()
32
  except Exception as e:
33
  print(f"Error instantiating agent: {e}")
34
  return f"Error initializing agent: {e}", None
35
+
36
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
37
  print(agent_code)
38
 
 
43
  response.raise_for_status()
44
  questions_data = response.json()
45
  if not questions_data:
46
+ print("Fetched questions list is empty.")
47
+ return "Fetched questions list is empty or invalid format.", None
48
  print(f"Fetched {len(questions_data)} questions.")
49
+ except Exception as e:
50
  print(f"Error fetching questions: {e}")
51
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
52
 
53
+ # 3. Run Agent
54
  results_log = []
55
  answers_payload = []
56
  print(f"Running agent on {len(questions_data)} questions...")
57
+
58
  for item in questions_data:
59
  task_id = item.get("task_id")
60
  question_text = item.get("question")
61
+ file_name = item.get("file_name", "")
62
+
63
  if not task_id or question_text is None:
64
  print(f"Skipping item with missing task_id or question: {item}")
65
  continue
66
+
67
  try:
68
+ # Attach file info to question if present
69
+ if file_name:
70
+ # Try to download the file and pass its URL/info to the agent
71
+ file_url = f"{api_url}/files/{task_id}"
72
+ full_question = f"{question_text}\n\n[Attached file: {file_name}, available at: {file_url}]"
73
+ else:
74
+ full_question = question_text
75
+
76
+ submitted_answer = agent(full_question)
77
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
78
+ results_log.append({
79
+ "Task ID": task_id,
80
+ "Question": question_text,
81
+ "Submitted Answer": submitted_answer
82
+ })
83
  except Exception as e:
84
+ print(f"Error running agent on task {task_id}: {e}")
85
+ results_log.append({
86
+ "Task ID": task_id,
87
+ "Question": question_text,
88
+ "Submitted Answer": f"AGENT ERROR: {e}"
89
+ })
90
 
91
  if not answers_payload:
92
  print("Agent did not produce any answers to submit.")
93
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
94
 
95
+ # 4. Submit
96
+ submission_data = {
97
+ "username": username.strip(),
98
+ "agent_code": agent_code,
99
+ "answers": answers_payload
100
+ }
101
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
102
+
103
  try:
104
  response = requests.post(submit_url, json=submission_data, timeout=60)
105
  response.raise_for_status()
 
112
  f"Message: {result_data.get('message', 'No message received.')}"
113
  )
114
  print("Submission successful.")
115
+ return final_status, pd.DataFrame(results_log)
 
116
  except requests.exceptions.HTTPError as e:
117
  error_detail = f"Server responded with status {e.response.status_code}."
118
  try:
119
  error_json = e.response.json()
120
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
121
+ except Exception:
122
  error_detail += f" Response: {e.response.text[:500]}"
123
  status_message = f"Submission Failed: {error_detail}"
124
  print(status_message)
125
+ return status_message, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
126
  except Exception as e:
127
  status_message = f"An unexpected error occurred during submission: {e}"
128
  print(status_message)
129
+ return status_message, pd.DataFrame(results_log)
 
130
 
131
 
132
+ # --- Gradio Interface ---
133
  with gr.Blocks() as demo:
134
+ gr.Markdown("# GAIA Agent Evaluation Runner")
135
  gr.Markdown(
136
  """
137
+ **LangChain-powered agent** with web search, code execution, file reading, and image understanding.
138
 
139
+ **Instructions:**
140
+ 1. Log in to your Hugging Face account using the button below.
141
+ 2. Click **Run Evaluation & Submit All Answers** to fetch questions, run the agent, and submit.
142
 
143
+ > ⏳ This may take several minutes — the agent processes each question using live tools.
 
 
 
144
  """
145
  )
146
 
147
  gr.LoginButton()
148
 
149
+ run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
 
150
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
151
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
152
 
153
  run_button.click(
 
156
  )
157
 
158
  if __name__ == "__main__":
159
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
 
160
  space_host_startup = os.getenv("SPACE_HOST")
161
+ space_id_startup = os.getenv("SPACE_ID")
162
 
163
  if space_host_startup:
164
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
165
  else:
166
+ print("ℹ️ SPACE_HOST not found (running locally?).")
167
 
168
+ if space_id_startup:
169
  print(f"✅ SPACE_ID found: {space_id_startup}")
170
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
 
171
  else:
172
+ print("ℹ️ SPACE_ID not found (running locally?).")
 
 
173
 
174
+ print("-" * (60 + len(" App Starting ")) + "\n")
175
+ print("Launching Gradio Interface...")
176
+ demo.launch(debug=True, share=False)