diff --git a/src/core.ts b/src/core.ts index 0dedc53eb..a3f664906 100644 --- a/src/core.ts +++ b/src/core.ts @@ -1287,6 +1287,27 @@ export const toBase64 = (str: string | null | undefined): string => { throw new OpenAIError('Cannot generate b64 string; Expected `Buffer` or `btoa` to be defined'); }; +/** + * Converts a Base64 encoded string to a Float32Array. + * @param base64Str - The Base64 encoded string. + * @returns An Array of numbers interpreted as Float32 values. + */ +export const toFloat32Array = (base64Str: string): Array => { + if (typeof Buffer !== 'undefined') { + // for Node.js environment + return Array.from(new Float32Array(Buffer.from(base64Str, 'base64').buffer)); + } else { + // for legacy web platform APIs + const binaryStr = atob(base64Str); + const len = binaryStr.length; + const bytes = new Uint8Array(len); + for (let i = 0; i < len; i++) { + bytes[i] = binaryStr.charCodeAt(i); + } + return Array.from(new Float32Array(bytes.buffer)); + } +}; + export function isObj(obj: unknown): obj is Record { return obj != null && typeof obj === 'object' && !Array.isArray(obj); } diff --git a/src/resources/embeddings.ts b/src/resources/embeddings.ts index d01ffc807..a4be9ca3c 100644 --- a/src/resources/embeddings.ts +++ b/src/resources/embeddings.ts @@ -9,9 +9,47 @@ export class Embeddings extends APIResource { */ create( body: EmbeddingCreateParams, - options?: Core.RequestOptions, + options?: Core.RequestOptions, ): Core.APIPromise { - return this._client.post('/embeddings', { body, ...options }); + const hasUserProvidedEncodingFormat = !!body.encoding_format; + // No encoding_format specified, defaulting to base64 for performance reasons + // See https://github.com/openai/openai-node/pull/1312 + let encoding_format: EmbeddingCreateParams['encoding_format'] = + hasUserProvidedEncodingFormat ? body.encoding_format : 'base64'; + + if (hasUserProvidedEncodingFormat) { + Core.debug('Request', 'User defined encoding_format:', body.encoding_format); + } + + const response: Core.APIPromise = this._client.post('/embeddings', { + body: { + ...body, + encoding_format: encoding_format as EmbeddingCreateParams['encoding_format'], + }, + ...options, + }); + + // if the user specified an encoding_format, return the response as-is + if (hasUserProvidedEncodingFormat) { + return response; + } + + // in this stage, we are sure the user did not specify an encoding_format + // and we defaulted to base64 for performance reasons + // we are sure then that the response is base64 encoded, let's decode it + // the returned result will be a float32 array since this is OpenAI API's default encoding + Core.debug('response', 'Decoding base64 embeddings to float32 array'); + + return (response as Core.APIPromise)._thenUnwrap((response) => { + if (response && response.data) { + response.data.forEach((embeddingBase64Obj) => { + const embeddingBase64Str = embeddingBase64Obj.embedding as unknown as string; + embeddingBase64Obj.embedding = Core.toFloat32Array(embeddingBase64Str); + }); + } + + return response; + }); } } diff --git a/tests/api-resources/embeddings.test.ts b/tests/api-resources/embeddings.test.ts index 46dd1b2a3..e226ade9e 100644 --- a/tests/api-resources/embeddings.test.ts +++ b/tests/api-resources/embeddings.test.ts @@ -32,4 +32,35 @@ describe('resource embeddings', () => { user: 'user-1234', }); }); + + test('create: encoding_format=float should create float32 embeddings', async () => { + const response = await client.embeddings.create({ + input: 'The quick brown fox jumped over the lazy dog', + model: 'text-embedding-3-small', + }); + + expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array); + expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true); + }); + + test('create: encoding_format=base64 should create float32 embeddings', async () => { + const response = await client.embeddings.create({ + input: 'The quick brown fox jumped over the lazy dog', + model: 'text-embedding-3-small', + encoding_format: 'base64', + }); + + expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array); + expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true); + }); + + test('create: encoding_format=default should create float32 embeddings', async () => { + const response = await client.embeddings.create({ + input: 'The quick brown fox jumped over the lazy dog', + model: 'text-embedding-3-small', + }); + + expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array); + expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true); + }); });