Skip to content

Streaming responses

Streaming in itty-router can be done by returning the right set of headers and using a TransformStream.

Here's an example for returning a ChatGPT response via streaming, here words are being sent to the client's as soon as the they get generated by the model, instead of waiting for the complete response.

import { OpenAPIRoute, OpenAPIRouter, Query } from '@cloudflare/itty-router-openapi'
import {z} from 'zod'
import OpenAI from 'openai'

export class ChatGPT extends OpenAPIRoute {
  static schema = {
    parameters: {
      question: Query(z.string()),
    },
    response: {
      200: {
        description: 'ChatGPT response',
        contentType: 'text/plain'
      },
    },
  }

  async handle(
    request: Request,
    env: any,
    context: any,
    data: any
  ) {
    const { question } = data.query

    const { readable, writable } = new TransformStream()
    const writer = writable.getWriter()
    const encoder = new TextEncoder()

    const openai = new OpenAI({
      apiKey: 'your-api-key',
    })

    const response = await openai.chat.completions.create({
      model: 'gpt-3.5-turbo',
      messages: [{
        role: 'user',
        content: question,
      }],
      temperature: 0,
      max_tokens: 256,
      stream: true,  // send this parameter to let openai know they need to stream the response
    })

    const fn = async () => {
      for await (const message of response) {
        const text = message.choices[0]?.delta.content ?? ''
        await Promise.all(
          Array.from(text).map(async (s) => {
            // stream the text chunk
            await writer.write(encoder.encode(s))

            // wait a minimum of 20 miliseconds before sending another chunk
            await new Promise((res) => setTimeout(res, 20))
          }),
        )
      }
    }

    // When the response is over, close the write stream
    fn().finally(async () => {
      await writer.close()
    })

    return new Response(readable, {
      headers: {  // required headers for streaming responses
        'content-type': 'text/plain; charset=UTF-8',
        'x-content-type-options': 'nosniff',
        'transfer-encoding': 'chunked',
      },
    })
  }
}