Data Dictionary Formats

Feature Croissant CSVW Data Package JSON Schema PostgreSQL XML Schema
Preamble
{
  "@type": "sc:Dataset",
  "conformsTo":
"http://mlcommons.org/croissant/1.0",
  ⋯▽
}
{
  "@context":
    "http://www.w3.org/ns/csvw",
  ⋯▽
}
multiple tables
{
  "@context":
    "http://www.w3.org/ns/csvw",
  "tables": [{⋯▽}, ]
}
{
  "id": "𝓍",
  "name": "𝓎",
  ⋯▽
}
{
  "type": "object",
  ⋯▽ 
}
<?xml version="1.0" encoding="UTF-8" ?>
<xs:schema xmlns:xs="http://www.w3.org/
2001/XMLSchema">
  ⋯▽
</xs:schema>
Title
"name": "𝓍"
"dc:title": "𝓍"
multilingual
"dc:title": [
  {
    "@value": "𝓍",
    "@language": "en"
  },
  {
    "@value": "𝓎",
    "@language": "fr"
  }
]
"title": "𝓍"
"title": "𝓍"
Description
"description": "𝓍"
"description": "𝓍"
in CommonMark format
"description": "𝓍"
comment on table "𝑻"
is '𝓍';
<xs:annotation>
  <xs:documentation>
    𝓍
  </xs:documentation>
</xs:annotation>
multilingual
<xs:annotation>
  <xs:documentation xml:lang="en">
    𝓍
  </xs:documentation>
  <xs:documentation xml:lang="fr">
    𝓎
  </xs:documentation>
</xs:annotation>
Data source
"distribution": [
  {
    "@type": "cr:FileObject",
    "@id": "𝑭",
    "contentUrl": "https://𝓍"
  }, 
]
link from field
defined at the field level below
"source": {
  "fileSet": {"@id": "𝑭"},
  "extract": {
    "column": "𝓍"
  }
}
"url": "https://𝓍"
"url": "𝓍.csv"
defined at the resources level below
"path": "𝓍.csv"
"path": ["𝓍.csv", "𝓎.csv"]
copy "𝑻" from '𝓍.csv'
(format csv header);
License
"license": "https://𝓍"
"dc:license": {
  "@id": "https://𝓍"
}
"licenses": [
  {
    "name": "𝓍",
    "path": "https://𝓎",
    "title": "𝓏"
  }, 
]

Table definition
"recordSet": [
  {
    "@type": "cr:RecordSet",
    "@id": "𝑻",
    ⋯▽
  }, 
]
"tableSchema": {⋯▽},
"resources": [
  {
    "name": "𝓍",
    "schema": {⋯▽},
    
  }, 
]
create table "𝑻" ⋯▽;
Primary key
"key": {"@id": "𝑻/𝓍"}
"primaryKey": "𝓍"
"primaryKey": ["𝓍", "𝓎"]
"primaryKey": "𝓍"
"primaryKey": ["𝓍", "𝓎"]
defined after fields
primary key ("𝓍")
primary key ("𝓍", "𝓎")
Foreign keys
defined at the field level below
"references": {
  "@id": "𝓎/𝓏"
},
"foreignKeys": [{
  "columnReference": ["𝓍"],
  "reference": {
    "resource": "https://𝓎",
    "columnReference": ["𝓏"]
  }
}]
"foreignKeys": [{
  "fields": ["𝓍"],
  "reference": {
    "resource": "𝓎",
    "fields": ["𝓏"]
  }
}]
defined after fields
foreign key ("𝓍")
references "𝓎"("𝓏")

Fields and names
"field": [
  {
    "@type": "cr:Field",
    "@id": "𝑻/𝒇",
    ⋯▽
  },
  
]
"columns": [
  {"name": "𝒇", ⋯▽},
  
]
"fields": [
  {"name": "𝒇", ⋯▽},
  
]
"type": "object",
"properties": {
  "𝒇": {⋯▽},
  
},
"required" : [
  "𝒇", 
]
(
  "𝒇" ⋯▽,
  
)
<xs:element name="𝒇" />
Field title
"name": "𝓍"
"titles": "𝓍"
"titles": ["𝓍", "𝓎"]
multilingual
"titles": {
  "en": "𝓍",
  "fr": ["𝓎", "𝓏"]
}
"title": "𝓍"
"title": "𝓍"
Field description
"description": "𝓍"
"dc:description": "𝓍"
"description": "𝓍"
"description": "𝓍"
as a separate statement
comment on column "𝑻"."𝒇"
is '𝓍';
<xs:element />
  <xs:annotation>
    <xs:documentation>
      𝓍
    </xs:documentation>
  </xs:annotation>
</xs:element>
multilingual
<xs:element />
  <xs:annotation>
    <xs:documentation xml:lang="en">
      𝓍
    </xs:documentation>
    <xs:documentation xml:lang="fr">
      𝓎
    </xs:documentation>
  </xs:annotation>
</xs:element>
Text field
"dataType": "sc:Text"
"datatype": "string"
"type": "string"
"type": "string"
text
<xs:element  type="xs:string"/>
Integer field
"dataType": "sc:Integer"
"datatype": "integer"
"type": "integer"
"type": "integer"
integer
<xs:element  type="xs:integer"/>
Numeric field
"dataType": "sc:Float"
"datatype": "number"
"type": "number"
"type": "number"
numeric
<xs:element  type="xs:decimal"/>
Boolean field
"dataType": "sc:Boolean"
"datatype": "boolean"
"type": "boolean"
"type": "boolean"
boolean
<xs:element  type="xs:boolean"/>
ISO Date field
"dataType": "sc:Date"
"datatype": {
  "base": "date",
  "format": "yyyy-M-d"
}
"type": "date"
"type": "string",
"format": "date"
date
<xs:element  type="xs:date"/>
Null allowed
default
default
"type": [𝒕, "null"]
default
<xs:element  nillable="true"/>
Null disallowed
"required": true
"constraints": {
  "required": true
}
default
not null
default
Multiple values
"repeated": true
"type": "array",
"items": {"type": 𝒕}
𝒕[]
<xs:element  maxOccurs="unbounded"/>

Controlled list field
inline data + references
defined at the recordSet level
{
  "@type": "cr:RecordSet",
  "@id": "𝒕",
  "key": {"@id": "𝒕/𝒌"},
  "field": [
    {
      "@type": "cr:Field",
      "@id": "𝒕/𝒌",
      "dataType": "𝒌ₜ"
    },
    {
      "@type": "cr:Field",
      "@id": "𝒕/𝒗",
      "dataType": "𝒗ₜ",
    }
  ],
  "data": [
    {"𝒕/𝒌": 𝓍, "𝒕/𝒗": 𝑿},
    {"𝒕/𝒌": 𝓎, "𝒕/𝒗": 𝒀},
    {"𝒕/𝒌": 𝓏, "𝒕/𝒗": 𝒁}
  ]
},
{
  "@type": "cr:RecordSet",
  "field": [
    {
      "@type": "cr:Field",
      "dataType": "𝒌ₜ",
      "references": {
        "@id": "𝒕/𝒌"
      }
    }
  ]
}
"categories": [𝓍, 𝓎, 𝓏]
labels, enum constraints
"categories": [
  {"value": 𝓍, "label": "𝑿"},
  {"value": 𝓎, "label": "𝒀"},
  {"value": 𝓏, "label": "𝒁"},
]
"constraints": {
  "enum": [𝓍, 𝓎, 𝓏]
}
"enum": [𝓍, 𝓎, 𝓏]
defined after fields
check ("𝑻"."𝒇" in (𝓍, 𝓎, 𝓏))
enumeration types
<xs:simpleType name="𝒕">
  <xs:restriction base="𝒃">
    <xs:enumeration value="𝓍">
      <annotation>
        <documentation>
          𝑿
        </documentation>
      </annotation>
    </xs:enumeration>
    <xs:enumeration value="𝓎">
      <annotation>
        <documentation>
          𝒀
        </documentation>
      </annotation>
    </xs:enumeration>
    <xs:enumeration value="𝓏">
      <annotation>
        <documentation>
          𝒁
        </documentation>
      </annotation>
    </xs:enumeration>
  </xs:restriction>
</xs:simpleType>
<xs:element  type="𝒕"/>
multilingual labels left as an exercise for the reader
Length limits
"minLength": 𝓍,
"maxLength": 𝓎
"constraints": {
  "minLength": 𝓍,
  "maxLength": 𝓎
}
"minLength": 𝓍,
"maxLength": 𝓎
check (char_length("𝑻"."𝒇")
between 𝓍 and 𝓎)
<xs:simpleType name="𝒕">
  <xs:restriction base="string">
    <xs:minLength value="𝓍"/>
    <xs:maxLength value="𝓎"/>
  </xs:restriction>
</xs:simpleType>
<xs:element  type="𝒕"/>
Regular expression
"constraints": {
  "pattern": 𝓍
}
"pattern": "𝓍"
check ("𝑻"."𝒇" ~ '𝓍')
<xs:simpleType name="𝒕">
  <xs:restriction base="string">
    <xs:pattern value="𝓍"/>
  </xs:restriction>
</xs:simpleType>
<xs:element  type="𝒕"/>
Range limits
"minimum": 𝓍,
"maximumExclusive": 𝓎
"constraints": {
  "minimum": 𝓍,
  "exclusiveMaximum": 𝓎
}
"minimum": 𝓍,
"exclusiveMaximum": 𝓎
check (𝓍 <= "𝑻"."𝒇" and
"𝑻"."𝒇" < 𝓎)
<xs:simpleType name="𝒕">
  <xs:restriction base="𝒃">
    <xs:minInclusive value="𝓍"/>
    <xs:maxExclusive value="𝓎"/>
  </xs:restriction>
</xs:simpleType>
<xs:element  type="𝒕"/>
Field examples
defined at the RecordSet level
"examples": [
  {"𝑻/𝓍": 𝑿₁, "𝑻/𝓎": 𝒀₁},
  {"𝑻/𝓍": 𝑿₂, "𝑻/𝓎": 𝒀₂}
]
"example": 𝓍
"examples": [𝓍]
"examples": [𝓍, 𝓎]