From e022473ae7abeda0bfadbb8924de95ce90b4f859 Mon Sep 17 00:00:00 2001 From: D Gardner Date: Tue, 1 Jul 2025 16:00:59 +0100 Subject: [PATCH 1/2] structured-outputs: support new OpenAI schema constraints --- README.md | 47 +++++ openai-java-core/build.gradle.kts | 2 + .../com/openai/core/JsonSchemaValidator.kt | 69 ++++-- .../com/openai/core/StructuredOutputs.kt | 4 + .../com/openai/core/StructuredOutputsTest.kt | 196 ++++++++++++++++-- .../example/StructuredOutputsExample.java | 4 + 6 files changed, 291 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 01b957352..5a181d4f0 100644 --- a/README.md +++ b/README.md @@ -580,6 +580,53 @@ If you use `@JsonProperty(required = false)`, the `false` value will be ignored. must mark all properties as _required_, so the schema generated from your Java classes will respect that restriction and ignore any annotation that would violate it. +You can also use [OpenAPI Swagger 2](https://swagger.io/specification/v2/) +[`@Schema`](https://github.com/swagger-api/swagger-core/wiki/Swagger-2.X---Annotations#schema) and +[`@ArraySchema`](https://github.com/swagger-api/swagger-core/wiki/Swagger-2.X---Annotations#arrayschema) +annotations. These allow type-specific constraints to be added to your schema properties. You can +learn more about the supported constraints in the OpenAI documentation on +[Supported properties](https://platform.openai.com/docs/guides/structured-outputs#supported-properties). + +```java +import io.swagger.v3.oas.annotations.media.Schema; +import io.swagger.v3.oas.annotations.media.ArraySchema; + +class Article { + @ArraySchema(minItems = 1, maxItems = 10) + public List authors; + + @Schema(pattern = "^[A-Za-z ]+$") + public String title; + + @Schema(format = "date") + public String publicationDate; + + @Schema(minimum = "1") + public int pageCount; +} +``` + +Local validation will check that you have not used any unsupported constraint keywords. However, the +values of those constraints are _not_ validated locally. For example, if you use a value for the +`"format"` constraint of a string property that is not in the list of supported format names (see +the link to the OpenAI documentation above), then local validation will pass, but the AI model may +report an error. + +If you use both Jackson and Swagger annotations to set the same schema field, the Jackson annotation +will take precedence. In the following example, the description of `myProperty` will be set to +"Jackson description"; "Swagger description" will be ignored: + +```java +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.swagger.v3.oas.annotations.media.Schema; + +class MyObject { + @Schema(description = "Swagger description") + @JsonPropertyDescription("Jackson description") + public String myProperty; +} +``` + ## Function calling with JSON schemas OpenAI [Function Calling](https://platform.openai.com/docs/guides/function-calling?api-mode=chat) diff --git a/openai-java-core/build.gradle.kts b/openai-java-core/build.gradle.kts index 894f0e234..720aa1397 100644 --- a/openai-java-core/build.gradle.kts +++ b/openai-java-core/build.gradle.kts @@ -20,6 +20,7 @@ dependencies { api("com.fasterxml.jackson.core:jackson-core:2.18.2") api("com.fasterxml.jackson.core:jackson-databind:2.18.2") api("com.google.errorprone:error_prone_annotations:2.33.0") + api("io.swagger.core.v3:swagger-annotations:2.2.31") implementation("com.fasterxml.jackson.core:jackson-annotations:2.18.2") implementation("com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.18.2") @@ -29,6 +30,7 @@ dependencies { implementation("org.apache.httpcomponents.client5:httpclient5:5.3.1") implementation("com.github.victools:jsonschema-generator:4.38.0") implementation("com.github.victools:jsonschema-module-jackson:4.38.0") + implementation("com.github.victools:jsonschema-module-swagger-2:4.38.0") testImplementation(kotlin("test")) testImplementation(project(":openai-java-client-okhttp")) diff --git a/openai-java-core/src/main/kotlin/com/openai/core/JsonSchemaValidator.kt b/openai-java-core/src/main/kotlin/com/openai/core/JsonSchemaValidator.kt index a0eff7442..87db4dacd 100644 --- a/openai-java-core/src/main/kotlin/com/openai/core/JsonSchemaValidator.kt +++ b/openai-java-core/src/main/kotlin/com/openai/core/JsonSchemaValidator.kt @@ -38,6 +38,16 @@ internal class JsonSchemaValidator private constructor() { private const val ENUM = "enum" private const val ADDITIONAL_PROPS = "additionalProperties" + private const val PATTERN = "pattern" + private const val FORMAT = "format" + private const val MULTIPLE_OF = "multipleOf" + private const val MINIMUM = "minimum" + private const val EXCLUSIVE_MINIMUM = "exclusiveMinimum" + private const val MAXIMUM = "maximum" + private const val EXCLUSIVE_MAXIMUM = "exclusiveMaximum" + private const val MIN_ITEMS = "minItems" + private const val MAX_ITEMS = "maxItems" + // The names of the supported schema data types. // // JSON Schema does not define an "integer" type, only a "number" type, but it allows any @@ -50,21 +60,8 @@ internal class JsonSchemaValidator private constructor() { private const val TYPE_INTEGER = "integer" private const val TYPE_NULL = "null" - // The validator checks that unsupported type-specific keywords are not present in a - // property node. The OpenAI API specification states: - // - // "Notable keywords not supported include: - // - // - For strings: `minLength`, `maxLength`, `pattern`, `format` - // - For numbers: `minimum`, `maximum`, `multipleOf` - // - For objects: `patternProperties`, `unevaluatedProperties`, `propertyNames`, - // `minProperties`, `maxProperties` - // - For arrays: `unevaluatedItems`, `contains`, `minContains`, `maxContains`, `minItems`, - // `maxItems`, `uniqueItems`" - // - // As that list is not exhaustive, and no keywords are explicitly named as supported, this - // validation allows _no_ type-specific keywords. The following sets define the allowed - // keywords in different contexts and all others are rejected. + // The following sets define the allowed keywords in different contexts and all others are + // rejected. /** * The set of allowed keywords in the root schema only, not including the keywords that are @@ -94,14 +91,40 @@ internal class JsonSchemaValidator private constructor() { * The set of allowed keywords when defining sub-schemas when the `"type"` field is set to * `"array"`. */ - private val ALLOWED_KEYWORDS_ARRAY_SUB_SCHEMA = setOf(TYPE, TITLE, DESC, ITEMS) + private val ALLOWED_KEYWORDS_ARRAY_SUB_SCHEMA = + setOf(TYPE, TITLE, DESC, ITEMS, MIN_ITEMS, MAX_ITEMS) /** * The set of allowed keywords when defining sub-schemas when the `"type"` field is set to - * `"boolean"`, `"integer"`, `"number"`, or `"string"`. + * `"boolean"`, or any other simple type not handled separately. */ private val ALLOWED_KEYWORDS_SIMPLE_SUB_SCHEMA = setOf(TYPE, TITLE, DESC, ENUM, CONST) + /** + * The set of allowed keywords when defining sub-schemas when the `"type"` field is set to + * `"string"`. + */ + private val ALLOWED_KEYWORDS_STRING_SUB_SCHEMA = + setOf(TYPE, TITLE, DESC, ENUM, CONST, PATTERN, FORMAT) + + /** + * The set of allowed keywords when defining sub-schemas when the `"type"` field is set to + * `"integer"` or `"number"`. + */ + private val ALLOWED_KEYWORDS_NUMBER_SUB_SCHEMA = + setOf( + TYPE, + TITLE, + DESC, + ENUM, + CONST, + MINIMUM, + EXCLUSIVE_MINIMUM, + MAXIMUM, + EXCLUSIVE_MAXIMUM, + MULTIPLE_OF, + ) + /** * The maximum total length of all strings used in the schema for property names, definition * names, enum values and const values. The OpenAI specification states: @@ -474,7 +497,15 @@ internal class JsonSchemaValidator private constructor() { * value of the non-`"null"` type name. For example `"string"`, or `"number"`. */ private fun validateSimpleSchema(schema: JsonNode, typeName: String, path: String, depth: Int) { - validateKeywords(schema, ALLOWED_KEYWORDS_SIMPLE_SUB_SCHEMA, path, depth) + val allowedKeywords = + when (typeName) { + TYPE_NUMBER, + TYPE_INTEGER -> ALLOWED_KEYWORDS_NUMBER_SUB_SCHEMA + TYPE_STRING -> ALLOWED_KEYWORDS_STRING_SUB_SCHEMA + else -> ALLOWED_KEYWORDS_SIMPLE_SUB_SCHEMA + } + + validateKeywords(schema, allowedKeywords, path, depth) val enumField = schema.get(ENUM) @@ -600,7 +631,7 @@ internal class JsonSchemaValidator private constructor() { /** * Validates that the names of all fields in the given schema node are present in a collection - * of allowed keywords. + * of allowed keywords. The values associated with the keywords are _not_ validated. * * @param depth The nesting depth of the [schema] node. If this depth is zero, an additional set * of allowed keywords will be included automatically for keywords that are allowed to appear diff --git a/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt b/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt index 611e6b076..73502c6d8 100644 --- a/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt +++ b/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt @@ -12,6 +12,7 @@ import com.github.victools.jsonschema.generator.OptionPreset import com.github.victools.jsonschema.generator.SchemaGenerator import com.github.victools.jsonschema.generator.SchemaGeneratorConfigBuilder import com.github.victools.jsonschema.module.jackson.JacksonModule +import com.github.victools.jsonschema.module.swagger2.Swagger2Module import com.openai.errors.OpenAIInvalidDataException import com.openai.models.FunctionDefinition import com.openai.models.ResponseFormatJsonSchema @@ -201,6 +202,9 @@ internal fun extractSchema(type: Class<*>): ObjectNode { // Use `JacksonModule` to support the use of Jackson annotations to set property and // class names and descriptions and to mark fields with `@JsonIgnore`. .with(JacksonModule()) + // Use `Swagger2Module` to support OpenAPI Swagger 2 `@Schema` annotations to set + // property constraints (e.g., a `"pattern"` constraint for a string property). + .with(Swagger2Module()) configBuilder .forFields() diff --git a/openai-java-core/src/test/kotlin/com/openai/core/StructuredOutputsTest.kt b/openai-java-core/src/test/kotlin/com/openai/core/StructuredOutputsTest.kt index cf5dedd29..6a489d38b 100644 --- a/openai-java-core/src/test/kotlin/com/openai/core/StructuredOutputsTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/core/StructuredOutputsTest.kt @@ -9,6 +9,8 @@ import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.node.ObjectNode import com.openai.errors.OpenAIInvalidDataException +import io.swagger.v3.oas.annotations.media.ArraySchema +import io.swagger.v3.oas.annotations.media.Schema import java.util.Optional import org.assertj.core.api.Assertions.assertThat import org.assertj.core.api.Assertions.assertThatNoException @@ -494,20 +496,16 @@ internal class StructuredOutputsTest { @Test fun schemaTest_unsupportedKeywords() { - // OpenAI lists a set of keywords that are not allowed, but the set is not exhaustive. Check - // that everything named in that set is identified as not allowed, as that is the minimum - // level of validation expected. Check at the root schema and a sub-schema. There is no need - // to match the keywords to their expected schema types or be concerned about the values of - // the keyword fields, which makes testing easier. + // OpenAI lists a set of keywords that are allowed (for non "fine-tuned" models). Check that + // a selection of keywords that are not listed as supported are identified as not allowed. + // Check at the root schema and a sub-schema. There is no need to match the keywords to + // their expected schema types or be concerned about the values of the keyword fields, which + // makes testing easier. Supported keywords are tested elsewhere (mostly when applied via + // annotations). val keywordsNotAllowed = listOf( "minLength", "maxLength", - "pattern", - "format", - "minimum", - "maximum", - "multipleOf", "patternProperties", "unevaluatedProperties", "propertyNames", @@ -517,8 +515,6 @@ internal class StructuredOutputsTest { "contains", "minContains", "maxContains", - "minItems", - "maxItems", "uniqueItems", ) val notAllowedUses = keywordsNotAllowed.joinToString(", ") { "\"$it\" : \"\"" } @@ -1359,6 +1355,182 @@ internal class StructuredOutputsTest { assertThat(s2Property).isNotNull } + @Test + fun schemaTest_annotatedWithSchemaStringConstraints() { + @Suppress("unused") + class X(@get:Schema(pattern = "^[a-z]+$", format = "email") val s: String) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val stringProperty = properties.get("s") + + assertThat(validator.isValid()).isTrue + assertThat(stringProperty).isNotNull + + assertThat(stringProperty.get("pattern")).isNotNull + assertThat(stringProperty.get("pattern").isTextual).isTrue + assertThat(stringProperty.get("pattern").asText()).isEqualTo("^[a-z]+$") + + assertThat(stringProperty.get("format")).isNotNull + assertThat(stringProperty.get("format").isTextual).isTrue + assertThat(stringProperty.get("format").asText()).isEqualTo("email") + } + + @Test + fun schemaTest_annotatedWithSchemaNumberConstraints() { + @Suppress("unused") + class X(@get:Schema(multipleOf = 5.0, minimum = "10.0", maximum = "55.0") val d: Double) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val numberProperty = properties.get("d") + + assertThat(validator.isValid()).isTrue + assertThat(numberProperty).isNotNull + + assertThat(numberProperty.get("multipleOf")).isNotNull + assertThat(numberProperty.get("multipleOf").isNumber).isTrue + assertThat(numberProperty.get("multipleOf").asDouble()).isEqualTo(5.0) + + assertThat(numberProperty.get("minimum")).isNotNull + assertThat(numberProperty.get("minimum").isNumber).isTrue + assertThat(numberProperty.get("minimum").asDouble()).isEqualTo(10.0) + + assertThat(numberProperty.get("maximum")).isNotNull + assertThat(numberProperty.get("maximum").isNumber).isTrue + assertThat(numberProperty.get("maximum").asDouble()).isEqualTo(55.0) + } + + @Test + fun schemaTest_annotatedWithSchemaNumberConstraintsExclusive() { + @Suppress("unused") + class X( + @get:Schema( + multipleOf = 5.0, + minimum = "10.0", + exclusiveMinimum = true, + maximum = "55.0", + exclusiveMaximum = true, + ) + val d: Double + ) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val numberProperty = properties.get("d") + + assertThat(validator.isValid()).isTrue + assertThat(numberProperty).isNotNull + + assertThat(numberProperty.get("multipleOf")).isNotNull + assertThat(numberProperty.get("multipleOf").isNumber).isTrue + assertThat(numberProperty.get("multipleOf").asDouble()).isEqualTo(5.0) + + // The pairing of `minimum` and `exclusiveMinimum` in the annotation is reduced to a single + // `"exclusiveMinimum"` field in the schema with a numeric value. Same for the maximum. + assertThat(numberProperty.get("exclusiveMinimum")).isNotNull + assertThat(numberProperty.get("exclusiveMinimum").isNumber).isTrue + assertThat(numberProperty.get("exclusiveMinimum").asDouble()).isEqualTo(10.0) + + assertThat(numberProperty.get("exclusiveMaximum")).isNotNull + assertThat(numberProperty.get("exclusiveMaximum").isNumber).isTrue + assertThat(numberProperty.get("exclusiveMaximum").asDouble()).isEqualTo(55.0) + } + + @Test + fun schemaTest_annotatedWithSchemaIntegerConstraints() { + @Suppress("unused") + class X(@get:Schema(multipleOf = 5.0, minimum = "10", maximum = "55") val i: Int) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val integerProperty = properties.get("i") + + assertThat(validator.isValid()).isTrue + assertThat(integerProperty).isNotNull + + assertThat(integerProperty.get("multipleOf")).isNotNull + assertThat(integerProperty.get("multipleOf").isNumber).isTrue + assertThat(integerProperty.get("multipleOf").asDouble()).isEqualTo(5.0) + + assertThat(integerProperty.get("minimum")).isNotNull + assertThat(integerProperty.get("minimum").isNumber).isTrue + assertThat(integerProperty.get("minimum").asInt()).isEqualTo(10) + + assertThat(integerProperty.get("maximum")).isNotNull + assertThat(integerProperty.get("maximum").isNumber).isTrue + assertThat(integerProperty.get("maximum").asInt()).isEqualTo(55) + } + + @Test + fun schemaTest_annotatedWithSchemaIntegerConstraintsExclusive() { + @Suppress("unused") + class X( + @get:Schema( + multipleOf = 5.0, + minimum = "10", + exclusiveMinimum = true, + maximum = "55", + exclusiveMaximum = true, + ) + val i: Int + ) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val integerProperty = properties.get("i") + + assertThat(validator.isValid()).isTrue + assertThat(integerProperty).isNotNull + + assertThat(integerProperty.get("multipleOf")).isNotNull + assertThat(integerProperty.get("multipleOf").isNumber).isTrue + assertThat(integerProperty.get("multipleOf").asDouble()).isEqualTo(5.0) + + // The pairing of `minimum` and `exclusiveMinimum` in the annotation is reduced to a single + // `"exclusiveMinimum"` field in the schema with a numeric value. Same for the maximum. + assertThat(integerProperty.get("exclusiveMinimum")).isNotNull + assertThat(integerProperty.get("exclusiveMinimum").isNumber).isTrue + assertThat(integerProperty.get("exclusiveMinimum").asInt()).isEqualTo(10) + + assertThat(integerProperty.get("exclusiveMaximum")).isNotNull + assertThat(integerProperty.get("exclusiveMaximum").isNumber).isTrue + assertThat(integerProperty.get("exclusiveMaximum").asInt()).isEqualTo(55) + } + + @Test + fun schemaTest_annotatedWithArraySchemaArrayConstraints() { + @Suppress("unused") + class X(@get:ArraySchema(minItems = 11, maxItems = 42) val a: List) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val arrayProperty = properties.get("a") + + assertThat(validator.isValid()).isTrue + assertThat(arrayProperty).isNotNull + + assertThat(arrayProperty.get("minItems")).isNotNull + assertThat(arrayProperty.get("minItems").isInt).isTrue + assertThat(arrayProperty.get("minItems").asInt()).isEqualTo(11) + + assertThat(arrayProperty.get("maxItems")).isNotNull + assertThat(arrayProperty.get("maxItems").isInt).isTrue + assertThat(arrayProperty.get("maxItems").asInt()).isEqualTo(42) + } + @Test fun schemaTest_emptyDefinitions() { // Be lenient about empty definitions. diff --git a/openai-java-example/src/main/java/com/openai/example/StructuredOutputsExample.java b/openai-java-example/src/main/java/com/openai/example/StructuredOutputsExample.java index b4af99997..9f241ffc4 100644 --- a/openai-java-example/src/main/java/com/openai/example/StructuredOutputsExample.java +++ b/openai-java-example/src/main/java/com/openai/example/StructuredOutputsExample.java @@ -7,6 +7,8 @@ import com.openai.models.ChatModel; import com.openai.models.chat.completions.ChatCompletionCreateParams; import com.openai.models.chat.completions.StructuredChatCompletionCreateParams; +import io.swagger.v3.oas.annotations.media.ArraySchema; +import io.swagger.v3.oas.annotations.media.Schema; import java.util.List; public final class StructuredOutputsExample { @@ -32,6 +34,7 @@ public static class Book { public Person author; @JsonPropertyDescription("The year in which the book was first published.") + @Schema(minimum = "1500") public int publicationYear; public String genre; @@ -46,6 +49,7 @@ public String toString() { } public static class BookList { + @ArraySchema(maxItems = 100) public List books; } From 8db9e3635f7a86c16bff66cf154cc52f7879d3f0 Mon Sep 17 00:00:00 2001 From: D Gardner Date: Tue, 1 Jul 2025 17:17:49 +0100 Subject: [PATCH 2/2] structured-outputs: JSON schema constraints doc link --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5a181d4f0..5698fa6ad 100644 --- a/README.md +++ b/README.md @@ -607,10 +607,10 @@ class Article { ``` Local validation will check that you have not used any unsupported constraint keywords. However, the -values of those constraints are _not_ validated locally. For example, if you use a value for the -`"format"` constraint of a string property that is not in the list of supported format names (see -the link to the OpenAI documentation above), then local validation will pass, but the AI model may -report an error. +values of the are _not_ validated locally. For example, if you use a value for the `"format"` +constraint of a string property that is not in the list of +[supported format names](https://platform.openai.com/docs/guides/structured-outputs#supported-properties), +then local validation will pass, but the AI model may report an error. If you use both Jackson and Swagger annotations to set the same schema field, the Jackson annotation will take precedence. In the following example, the description of `myProperty` will be set to