diff --git a/README.md b/README.md index 01b95735..5698fa6a 100644 --- a/README.md +++ b/README.md @@ -580,6 +580,53 @@ If you use `@JsonProperty(required = false)`, the `false` value will be ignored. must mark all properties as _required_, so the schema generated from your Java classes will respect that restriction and ignore any annotation that would violate it. +You can also use [OpenAPI Swagger 2](https://swagger.io/specification/v2/) +[`@Schema`](https://github.com/swagger-api/swagger-core/wiki/Swagger-2.X---Annotations#schema) and +[`@ArraySchema`](https://github.com/swagger-api/swagger-core/wiki/Swagger-2.X---Annotations#arrayschema) +annotations. These allow type-specific constraints to be added to your schema properties. You can +learn more about the supported constraints in the OpenAI documentation on +[Supported properties](https://platform.openai.com/docs/guides/structured-outputs#supported-properties). + +```java +import io.swagger.v3.oas.annotations.media.Schema; +import io.swagger.v3.oas.annotations.media.ArraySchema; + +class Article { + @ArraySchema(minItems = 1, maxItems = 10) + public List authors; + + @Schema(pattern = "^[A-Za-z ]+$") + public String title; + + @Schema(format = "date") + public String publicationDate; + + @Schema(minimum = "1") + public int pageCount; +} +``` + +Local validation will check that you have not used any unsupported constraint keywords. However, the +values of the are _not_ validated locally. For example, if you use a value for the `"format"` +constraint of a string property that is not in the list of +[supported format names](https://platform.openai.com/docs/guides/structured-outputs#supported-properties), +then local validation will pass, but the AI model may report an error. + +If you use both Jackson and Swagger annotations to set the same schema field, the Jackson annotation +will take precedence. In the following example, the description of `myProperty` will be set to +"Jackson description"; "Swagger description" will be ignored: + +```java +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.swagger.v3.oas.annotations.media.Schema; + +class MyObject { + @Schema(description = "Swagger description") + @JsonPropertyDescription("Jackson description") + public String myProperty; +} +``` + ## Function calling with JSON schemas OpenAI [Function Calling](https://platform.openai.com/docs/guides/function-calling?api-mode=chat) diff --git a/openai-java-core/build.gradle.kts b/openai-java-core/build.gradle.kts index 894f0e23..720aa139 100644 --- a/openai-java-core/build.gradle.kts +++ b/openai-java-core/build.gradle.kts @@ -20,6 +20,7 @@ dependencies { api("com.fasterxml.jackson.core:jackson-core:2.18.2") api("com.fasterxml.jackson.core:jackson-databind:2.18.2") api("com.google.errorprone:error_prone_annotations:2.33.0") + api("io.swagger.core.v3:swagger-annotations:2.2.31") implementation("com.fasterxml.jackson.core:jackson-annotations:2.18.2") implementation("com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.18.2") @@ -29,6 +30,7 @@ dependencies { implementation("org.apache.httpcomponents.client5:httpclient5:5.3.1") implementation("com.github.victools:jsonschema-generator:4.38.0") implementation("com.github.victools:jsonschema-module-jackson:4.38.0") + implementation("com.github.victools:jsonschema-module-swagger-2:4.38.0") testImplementation(kotlin("test")) testImplementation(project(":openai-java-client-okhttp")) diff --git a/openai-java-core/src/main/kotlin/com/openai/core/JsonSchemaValidator.kt b/openai-java-core/src/main/kotlin/com/openai/core/JsonSchemaValidator.kt index a0eff744..87db4dac 100644 --- a/openai-java-core/src/main/kotlin/com/openai/core/JsonSchemaValidator.kt +++ b/openai-java-core/src/main/kotlin/com/openai/core/JsonSchemaValidator.kt @@ -38,6 +38,16 @@ internal class JsonSchemaValidator private constructor() { private const val ENUM = "enum" private const val ADDITIONAL_PROPS = "additionalProperties" + private const val PATTERN = "pattern" + private const val FORMAT = "format" + private const val MULTIPLE_OF = "multipleOf" + private const val MINIMUM = "minimum" + private const val EXCLUSIVE_MINIMUM = "exclusiveMinimum" + private const val MAXIMUM = "maximum" + private const val EXCLUSIVE_MAXIMUM = "exclusiveMaximum" + private const val MIN_ITEMS = "minItems" + private const val MAX_ITEMS = "maxItems" + // The names of the supported schema data types. // // JSON Schema does not define an "integer" type, only a "number" type, but it allows any @@ -50,21 +60,8 @@ internal class JsonSchemaValidator private constructor() { private const val TYPE_INTEGER = "integer" private const val TYPE_NULL = "null" - // The validator checks that unsupported type-specific keywords are not present in a - // property node. The OpenAI API specification states: - // - // "Notable keywords not supported include: - // - // - For strings: `minLength`, `maxLength`, `pattern`, `format` - // - For numbers: `minimum`, `maximum`, `multipleOf` - // - For objects: `patternProperties`, `unevaluatedProperties`, `propertyNames`, - // `minProperties`, `maxProperties` - // - For arrays: `unevaluatedItems`, `contains`, `minContains`, `maxContains`, `minItems`, - // `maxItems`, `uniqueItems`" - // - // As that list is not exhaustive, and no keywords are explicitly named as supported, this - // validation allows _no_ type-specific keywords. The following sets define the allowed - // keywords in different contexts and all others are rejected. + // The following sets define the allowed keywords in different contexts and all others are + // rejected. /** * The set of allowed keywords in the root schema only, not including the keywords that are @@ -94,14 +91,40 @@ internal class JsonSchemaValidator private constructor() { * The set of allowed keywords when defining sub-schemas when the `"type"` field is set to * `"array"`. */ - private val ALLOWED_KEYWORDS_ARRAY_SUB_SCHEMA = setOf(TYPE, TITLE, DESC, ITEMS) + private val ALLOWED_KEYWORDS_ARRAY_SUB_SCHEMA = + setOf(TYPE, TITLE, DESC, ITEMS, MIN_ITEMS, MAX_ITEMS) /** * The set of allowed keywords when defining sub-schemas when the `"type"` field is set to - * `"boolean"`, `"integer"`, `"number"`, or `"string"`. + * `"boolean"`, or any other simple type not handled separately. */ private val ALLOWED_KEYWORDS_SIMPLE_SUB_SCHEMA = setOf(TYPE, TITLE, DESC, ENUM, CONST) + /** + * The set of allowed keywords when defining sub-schemas when the `"type"` field is set to + * `"string"`. + */ + private val ALLOWED_KEYWORDS_STRING_SUB_SCHEMA = + setOf(TYPE, TITLE, DESC, ENUM, CONST, PATTERN, FORMAT) + + /** + * The set of allowed keywords when defining sub-schemas when the `"type"` field is set to + * `"integer"` or `"number"`. + */ + private val ALLOWED_KEYWORDS_NUMBER_SUB_SCHEMA = + setOf( + TYPE, + TITLE, + DESC, + ENUM, + CONST, + MINIMUM, + EXCLUSIVE_MINIMUM, + MAXIMUM, + EXCLUSIVE_MAXIMUM, + MULTIPLE_OF, + ) + /** * The maximum total length of all strings used in the schema for property names, definition * names, enum values and const values. The OpenAI specification states: @@ -474,7 +497,15 @@ internal class JsonSchemaValidator private constructor() { * value of the non-`"null"` type name. For example `"string"`, or `"number"`. */ private fun validateSimpleSchema(schema: JsonNode, typeName: String, path: String, depth: Int) { - validateKeywords(schema, ALLOWED_KEYWORDS_SIMPLE_SUB_SCHEMA, path, depth) + val allowedKeywords = + when (typeName) { + TYPE_NUMBER, + TYPE_INTEGER -> ALLOWED_KEYWORDS_NUMBER_SUB_SCHEMA + TYPE_STRING -> ALLOWED_KEYWORDS_STRING_SUB_SCHEMA + else -> ALLOWED_KEYWORDS_SIMPLE_SUB_SCHEMA + } + + validateKeywords(schema, allowedKeywords, path, depth) val enumField = schema.get(ENUM) @@ -600,7 +631,7 @@ internal class JsonSchemaValidator private constructor() { /** * Validates that the names of all fields in the given schema node are present in a collection - * of allowed keywords. + * of allowed keywords. The values associated with the keywords are _not_ validated. * * @param depth The nesting depth of the [schema] node. If this depth is zero, an additional set * of allowed keywords will be included automatically for keywords that are allowed to appear diff --git a/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt b/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt index 611e6b07..73502c6d 100644 --- a/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt +++ b/openai-java-core/src/main/kotlin/com/openai/core/StructuredOutputs.kt @@ -12,6 +12,7 @@ import com.github.victools.jsonschema.generator.OptionPreset import com.github.victools.jsonschema.generator.SchemaGenerator import com.github.victools.jsonschema.generator.SchemaGeneratorConfigBuilder import com.github.victools.jsonschema.module.jackson.JacksonModule +import com.github.victools.jsonschema.module.swagger2.Swagger2Module import com.openai.errors.OpenAIInvalidDataException import com.openai.models.FunctionDefinition import com.openai.models.ResponseFormatJsonSchema @@ -201,6 +202,9 @@ internal fun extractSchema(type: Class<*>): ObjectNode { // Use `JacksonModule` to support the use of Jackson annotations to set property and // class names and descriptions and to mark fields with `@JsonIgnore`. .with(JacksonModule()) + // Use `Swagger2Module` to support OpenAPI Swagger 2 `@Schema` annotations to set + // property constraints (e.g., a `"pattern"` constraint for a string property). + .with(Swagger2Module()) configBuilder .forFields() diff --git a/openai-java-core/src/test/kotlin/com/openai/core/StructuredOutputsTest.kt b/openai-java-core/src/test/kotlin/com/openai/core/StructuredOutputsTest.kt index cf5dedd2..6a489d38 100644 --- a/openai-java-core/src/test/kotlin/com/openai/core/StructuredOutputsTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/core/StructuredOutputsTest.kt @@ -9,6 +9,8 @@ import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.node.ObjectNode import com.openai.errors.OpenAIInvalidDataException +import io.swagger.v3.oas.annotations.media.ArraySchema +import io.swagger.v3.oas.annotations.media.Schema import java.util.Optional import org.assertj.core.api.Assertions.assertThat import org.assertj.core.api.Assertions.assertThatNoException @@ -494,20 +496,16 @@ internal class StructuredOutputsTest { @Test fun schemaTest_unsupportedKeywords() { - // OpenAI lists a set of keywords that are not allowed, but the set is not exhaustive. Check - // that everything named in that set is identified as not allowed, as that is the minimum - // level of validation expected. Check at the root schema and a sub-schema. There is no need - // to match the keywords to their expected schema types or be concerned about the values of - // the keyword fields, which makes testing easier. + // OpenAI lists a set of keywords that are allowed (for non "fine-tuned" models). Check that + // a selection of keywords that are not listed as supported are identified as not allowed. + // Check at the root schema and a sub-schema. There is no need to match the keywords to + // their expected schema types or be concerned about the values of the keyword fields, which + // makes testing easier. Supported keywords are tested elsewhere (mostly when applied via + // annotations). val keywordsNotAllowed = listOf( "minLength", "maxLength", - "pattern", - "format", - "minimum", - "maximum", - "multipleOf", "patternProperties", "unevaluatedProperties", "propertyNames", @@ -517,8 +515,6 @@ internal class StructuredOutputsTest { "contains", "minContains", "maxContains", - "minItems", - "maxItems", "uniqueItems", ) val notAllowedUses = keywordsNotAllowed.joinToString(", ") { "\"$it\" : \"\"" } @@ -1359,6 +1355,182 @@ internal class StructuredOutputsTest { assertThat(s2Property).isNotNull } + @Test + fun schemaTest_annotatedWithSchemaStringConstraints() { + @Suppress("unused") + class X(@get:Schema(pattern = "^[a-z]+$", format = "email") val s: String) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val stringProperty = properties.get("s") + + assertThat(validator.isValid()).isTrue + assertThat(stringProperty).isNotNull + + assertThat(stringProperty.get("pattern")).isNotNull + assertThat(stringProperty.get("pattern").isTextual).isTrue + assertThat(stringProperty.get("pattern").asText()).isEqualTo("^[a-z]+$") + + assertThat(stringProperty.get("format")).isNotNull + assertThat(stringProperty.get("format").isTextual).isTrue + assertThat(stringProperty.get("format").asText()).isEqualTo("email") + } + + @Test + fun schemaTest_annotatedWithSchemaNumberConstraints() { + @Suppress("unused") + class X(@get:Schema(multipleOf = 5.0, minimum = "10.0", maximum = "55.0") val d: Double) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val numberProperty = properties.get("d") + + assertThat(validator.isValid()).isTrue + assertThat(numberProperty).isNotNull + + assertThat(numberProperty.get("multipleOf")).isNotNull + assertThat(numberProperty.get("multipleOf").isNumber).isTrue + assertThat(numberProperty.get("multipleOf").asDouble()).isEqualTo(5.0) + + assertThat(numberProperty.get("minimum")).isNotNull + assertThat(numberProperty.get("minimum").isNumber).isTrue + assertThat(numberProperty.get("minimum").asDouble()).isEqualTo(10.0) + + assertThat(numberProperty.get("maximum")).isNotNull + assertThat(numberProperty.get("maximum").isNumber).isTrue + assertThat(numberProperty.get("maximum").asDouble()).isEqualTo(55.0) + } + + @Test + fun schemaTest_annotatedWithSchemaNumberConstraintsExclusive() { + @Suppress("unused") + class X( + @get:Schema( + multipleOf = 5.0, + minimum = "10.0", + exclusiveMinimum = true, + maximum = "55.0", + exclusiveMaximum = true, + ) + val d: Double + ) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val numberProperty = properties.get("d") + + assertThat(validator.isValid()).isTrue + assertThat(numberProperty).isNotNull + + assertThat(numberProperty.get("multipleOf")).isNotNull + assertThat(numberProperty.get("multipleOf").isNumber).isTrue + assertThat(numberProperty.get("multipleOf").asDouble()).isEqualTo(5.0) + + // The pairing of `minimum` and `exclusiveMinimum` in the annotation is reduced to a single + // `"exclusiveMinimum"` field in the schema with a numeric value. Same for the maximum. + assertThat(numberProperty.get("exclusiveMinimum")).isNotNull + assertThat(numberProperty.get("exclusiveMinimum").isNumber).isTrue + assertThat(numberProperty.get("exclusiveMinimum").asDouble()).isEqualTo(10.0) + + assertThat(numberProperty.get("exclusiveMaximum")).isNotNull + assertThat(numberProperty.get("exclusiveMaximum").isNumber).isTrue + assertThat(numberProperty.get("exclusiveMaximum").asDouble()).isEqualTo(55.0) + } + + @Test + fun schemaTest_annotatedWithSchemaIntegerConstraints() { + @Suppress("unused") + class X(@get:Schema(multipleOf = 5.0, minimum = "10", maximum = "55") val i: Int) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val integerProperty = properties.get("i") + + assertThat(validator.isValid()).isTrue + assertThat(integerProperty).isNotNull + + assertThat(integerProperty.get("multipleOf")).isNotNull + assertThat(integerProperty.get("multipleOf").isNumber).isTrue + assertThat(integerProperty.get("multipleOf").asDouble()).isEqualTo(5.0) + + assertThat(integerProperty.get("minimum")).isNotNull + assertThat(integerProperty.get("minimum").isNumber).isTrue + assertThat(integerProperty.get("minimum").asInt()).isEqualTo(10) + + assertThat(integerProperty.get("maximum")).isNotNull + assertThat(integerProperty.get("maximum").isNumber).isTrue + assertThat(integerProperty.get("maximum").asInt()).isEqualTo(55) + } + + @Test + fun schemaTest_annotatedWithSchemaIntegerConstraintsExclusive() { + @Suppress("unused") + class X( + @get:Schema( + multipleOf = 5.0, + minimum = "10", + exclusiveMinimum = true, + maximum = "55", + exclusiveMaximum = true, + ) + val i: Int + ) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val integerProperty = properties.get("i") + + assertThat(validator.isValid()).isTrue + assertThat(integerProperty).isNotNull + + assertThat(integerProperty.get("multipleOf")).isNotNull + assertThat(integerProperty.get("multipleOf").isNumber).isTrue + assertThat(integerProperty.get("multipleOf").asDouble()).isEqualTo(5.0) + + // The pairing of `minimum` and `exclusiveMinimum` in the annotation is reduced to a single + // `"exclusiveMinimum"` field in the schema with a numeric value. Same for the maximum. + assertThat(integerProperty.get("exclusiveMinimum")).isNotNull + assertThat(integerProperty.get("exclusiveMinimum").isNumber).isTrue + assertThat(integerProperty.get("exclusiveMinimum").asInt()).isEqualTo(10) + + assertThat(integerProperty.get("exclusiveMaximum")).isNotNull + assertThat(integerProperty.get("exclusiveMaximum").isNumber).isTrue + assertThat(integerProperty.get("exclusiveMaximum").asInt()).isEqualTo(55) + } + + @Test + fun schemaTest_annotatedWithArraySchemaArrayConstraints() { + @Suppress("unused") + class X(@get:ArraySchema(minItems = 11, maxItems = 42) val a: List) + + schema = extractSchema(X::class.java) + validator.validate(schema) + + val properties = schema.get("properties") + val arrayProperty = properties.get("a") + + assertThat(validator.isValid()).isTrue + assertThat(arrayProperty).isNotNull + + assertThat(arrayProperty.get("minItems")).isNotNull + assertThat(arrayProperty.get("minItems").isInt).isTrue + assertThat(arrayProperty.get("minItems").asInt()).isEqualTo(11) + + assertThat(arrayProperty.get("maxItems")).isNotNull + assertThat(arrayProperty.get("maxItems").isInt).isTrue + assertThat(arrayProperty.get("maxItems").asInt()).isEqualTo(42) + } + @Test fun schemaTest_emptyDefinitions() { // Be lenient about empty definitions. diff --git a/openai-java-example/src/main/java/com/openai/example/StructuredOutputsExample.java b/openai-java-example/src/main/java/com/openai/example/StructuredOutputsExample.java index b4af9999..9f241ffc 100644 --- a/openai-java-example/src/main/java/com/openai/example/StructuredOutputsExample.java +++ b/openai-java-example/src/main/java/com/openai/example/StructuredOutputsExample.java @@ -7,6 +7,8 @@ import com.openai.models.ChatModel; import com.openai.models.chat.completions.ChatCompletionCreateParams; import com.openai.models.chat.completions.StructuredChatCompletionCreateParams; +import io.swagger.v3.oas.annotations.media.ArraySchema; +import io.swagger.v3.oas.annotations.media.Schema; import java.util.List; public final class StructuredOutputsExample { @@ -32,6 +34,7 @@ public static class Book { public Person author; @JsonPropertyDescription("The year in which the book was first published.") + @Schema(minimum = "1500") public int publicationYear; public String genre; @@ -46,6 +49,7 @@ public String toString() { } public static class BookList { + @ArraySchema(maxItems = 100) public List books; }