forked from rungalileo/sdk-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhallucination.py
More file actions
106 lines (79 loc) · 3.28 KB
/
Copy pathhallucination.py
File metadata and controls
106 lines (79 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import json
import time
import openai # Using the standard OpenAI library
from galileo import GalileoLogger # Import GalileoLogger for logging
from dotenv import load_dotenv
load_dotenv()
# Initialize the GalileoLogger
logger = GalileoLogger(project="hallucination", log_stream="dev")
# Initialize the standard OpenAI client
client = openai.OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
organization=os.environ.get("OPENAI_ORGANIZATION"),
)
# Questions that might lead to hallucinations
QUESTIONS = [
"What is inside a black hole?",
"What will life be like in 100 years?",
"What was the score of the last Lakers game?",
]
# Different system prompts to test hallucination prevention
SYSTEM_PROMPTS = {"unconstrained": "You are a helpful assistant. Answer the user's questions."}
def generate_response(question: str, system_prompt: str) -> str:
"""Generate a response using the OpenAI API with the given system prompt."""
# Record the start time for the LLM call
start_time = time.time_ns()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": question},
],
)
# Record the end time for the LLM call
end_time = time.time_ns()
response_content = response.choices[0].message.content.strip()
# Log the LLM call as a span
logger.add_llm_span(
input=f"System: {system_prompt}\nUser: {question}",
output=response_content,
model="gpt-4o",
duration_ns=end_time - start_time,
num_input_tokens=response.usage.prompt_tokens,
num_output_tokens=response.usage.completion_tokens,
total_tokens=response.usage.total_tokens,
)
return response_content
def run_hallucination_demo():
"""Run the hallucination demonstration with different prompts and questions."""
results = []
for question in QUESTIONS:
# Start a trace for this question
trace = logger.start_trace(input=question)
question_results = {"question": question, "responses": []}
print(f"\n\n{'='*80}\nQUESTION: {question}\n{'='*80}")
for prompt_name, system_prompt in SYSTEM_PROMPTS.items():
# Generate response
response = generate_response(question, system_prompt)
# Store results
question_results["responses"].append({"prompt_type": prompt_name, "response": response})
# Print results for this question
print(f"\n--- PROMPT TYPE: {prompt_name} ---")
print(f"RESPONSE: {response}")
results.append(question_results)
# Conclude the trace with a summary of responses
summary = {
"question": question,
"responses": {k: v["response"] for k, v in enumerate(question_results["responses"])},
}
logger.conclude(output=json.dumps(summary))
return results
if __name__ == "__main__":
print("HALLUCINATION DEMONSTRATION PROGRAM")
print("This program demonstrates how LLMs can hallucinate and how to prevent it.")
print("It shows different prompting strategies to reduce hallucinations.")
# Run the main demonstration
results = run_hallucination_demo()
# Flush the traces to Galileo
logger.flush()