Use this file to discover all available pages before exploring further.
What this builds. Three demos against a single LLM pipeline — (1) two runs sharing a session_id to show that sessions do NOT carry memory; (2) a manual history loop that does; (3) node_input_overrides to swap the LLM system prompt at runtime.
You’ll end up with. Printed results from each run showing that session_id is for tracing only, while client-side history (or node_input_overrides) is what actually changes LLM behaviour.
from typing import Any, castfrom vectorshift.pipeline import Pipeline# ── Build and deploy a simple LLM pipeline ──────────────────────────PIPELINE_NAME = "session_overrides_example"try: pipeline = Pipeline.fetch(name=PIPELINE_NAME) print(f"Pipeline fetched: id={pipeline.id}, branch_id={pipeline.branch_id}")except Exception as e: print(f"Error fetching pipeline: {e}") pipeline = Pipeline.new(name=PIPELINE_NAME) print(f"Pipeline created: id={pipeline.id}, branch_id={pipeline.branch_id}")inp = pipeline.add(name="input_0", id="input_0").input(input_type="string")llm = pipeline.add(name="llm", id="llm").llm( provider="openai", model="gpt-4o", system="You are a helpful assistant.", prompt=inp.text,)out = pipeline.add(name="output_0", id="output_0").output( output_type="string", value=llm.response)pipeline.save(deploy=True)# ── 1. session_id: run grouping / tracing (not chat memory) ────────# Both runs share a session_id so they're grouped in the span system,# but the LLM node in the second run has no memory of the first.result1 = cast( dict[str, Any], pipeline.run( inputs={"input_0": "My name is Alice."}, session_id="session-001", ),)print("Run 1:", result1.get("outputs"))result2 = cast( dict[str, Any], pipeline.run( inputs={"input_0": "What is my name?"}, session_id="session-001", ),)# The LLM will NOT know the name "Alice" — session_id doesn't carry history.print("Run 2 (no memory):", result2.get("outputs"))# ── 2. Client-side conversation history ─────────────────────────────# To give the LLM conversational context, accumulate messages yourself# and pass the full history as the prompt input each run.history: list[str] = []prompts = ["My name is Alice.", "What is my name?"]for user_msg in prompts: history.append(f"User: {user_msg}") conversation = "\n".join(history) result = cast( dict[str, Any], pipeline.run( inputs={"input_0": conversation}, session_id="session-001", ), ) assistant_reply = result.get("outputs", {}).get("output_0", "") history.append(f"Assistant: {assistant_reply}") print(f"With history: {assistant_reply}")# ── 3. node_input_overrides: change node inputs at runtime ──────────result3 = cast( dict[str, Any], pipeline.run( inputs={"input_0": "What is your favorite thing to do?"}, node_input_overrides={ "llm.system": "You are a pirate. Answer everything like a pirate.", }, ),)print("Overridden:", result3.get("outputs"))
Pipeline created: id=..., branch_id=...Run 1: {'output_0': '...'}Run 2 (no memory): {'output_0': "I don't know your name..."}With history: ...With history: Your name is Alice.Overridden: {'output_0': 'Arr matey, ...'}
session_id is a tracing/analytics tag — it groups runs in the dashboard but doesn’t feed prior messages back into the LLM. For conversational memory, pass the running transcript as the prompt yourself, or use a Chatbot.