Data Formats¶
Glazing uses JSON Lines format for all datasets. Each line in a dataset-specific .jsonl file conforms to a specific JSON schema, which is validated using Pydantic v2. These files are converted from the native format of the dataset.
Data Location¶
Default locations after glazing init:
~/.local/share/glazing/
├── raw/ # Original format
│ ├── verbnet-3.4/
│ ├── propbank-frames/
│ ├── wn31-dict/
│ └── framenet_v17/
└── converted/ # JSON Lines
├── verbnet.jsonl
├── propbank.jsonl
├── wordnet.jsonl
└── framenet.jsonl
Dataset Schemas¶
VerbNet¶
JSON Schema:
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["id", "members", "themroles", "frames", "subclasses"],
"properties": {
"id": {
"type": "string",
"pattern": "^[a-z_]+-\\d+(\\.\\d+)*(-\\d+)?$",
"description": "VerbNet class ID (e.g., 'give-13.1')"
},
"members": {
"type": "array",
"items": {
"type": "object",
"required": ["name", "verbnet_key"],
"properties": {
"name": {"type": "string"},
"verbnet_key": {
"type": "string",
"pattern": "^[a-z][a-z0-9_\\-\\.\\s]*#\\d+$"
},
"framenet_mappings": {"type": "array", "items": {"type": "object"}},
"propbank_mappings": {"type": "array", "items": {"type": "object"}},
"wordnet_mappings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"sense_key": {"type": ["string", "null"]},
"synset_offset": {"type": ["string", "null"]},
"lemma": {"type": "string"},
"pos": {"type": "string"},
"sense_number": {"type": ["integer", "null"]}
}
}
},
"features": {"type": "object"},
"mapping_metadata": {"type": ["object", "null"]},
"inherited_from_class": {"type": ["string", "null"]}
}
}
},
"themroles": {
"type": "array",
"items": {
"type": "object",
"required": ["type"],
"properties": {
"type": {"type": "string"},
"sel_restrictions": {
"type": "object",
"properties": {
"logic": {"type": ["string", "null"]},
"restrictions": {"type": "array"}
}
}
}
}
},
"frames": {
"type": "array",
"items": {
"type": "object",
"required": ["description", "examples", "syntax", "semantics"],
"properties": {
"description": {
"type": "object",
"properties": {
"description_number": {"type": "string"},
"primary": {"type": "string"},
"secondary": {"type": "string"}
}
},
"examples": {
"type": "array",
"items": {
"type": "object",
"properties": {"text": {"type": "string"}}
}
},
"syntax": {
"type": "object",
"properties": {
"elements": {
"type": "array",
"items": {
"type": "object",
"properties": {
"pos": {"type": "string"},
"value": {"type": ["string", "null"]},
"synrestrs": {"type": "array"},
"selrestrs": {"type": "array"}
}
}
}
}
},
"semantics": {
"type": "object",
"properties": {
"predicates": {
"type": "array",
"items": {
"type": "object",
"properties": {
"value": {"type": "string"},
"args": {"type": "array"},
"negated": {"type": "boolean"}
}
}
}
}
}
}
}
},
"subclasses": {"type": "array"},
"parent_class": {"type": ["string", "null"]}
}
}
Example Entry:
{
"id": "give-13.1-1",
"members": [
{
"name": "give",
"verbnet_key": "give#3",
"framenet_mappings": [],
"propbank_mappings": [],
"wordnet_mappings": [
{"sense_key": "give%2:40:03::", "synset_offset": null, "lemma": "give", "pos": "v", "sense_number": null},
{"sense_key": "give%2:40:00::", "synset_offset": null, "lemma": "give", "pos": "v", "sense_number": null}
],
"features": {},
"mapping_metadata": null,
"inherited_from_class": null
}
],
"themroles": [
{"type": "Agent", "sel_restrictions": {"logic": null, "restrictions": []}},
{"type": "Theme", "sel_restrictions": {"logic": null, "restrictions": []}},
{"type": "Recipient", "sel_restrictions": {"logic": null, "restrictions": []}}
],
"frames": [
{
"description": {
"description_number": "0.0",
"primary": "NP V NP NP",
"secondary": "Basic Transitive"
},
"examples": [{"text": "Carmen handed the pirate the treasure."}],
"syntax": {
"elements": [
{"pos": "NP", "value": "Agent", "synrestrs": [], "selrestrs": []},
{"pos": "VERB", "value": null, "synrestrs": [], "selrestrs": []},
{"pos": "NP", "value": "Recipient", "synrestrs": [], "selrestrs": []},
{"pos": "NP", "value": "Theme", "synrestrs": [], "selrestrs": []}
]
},
"semantics": {
"predicates": [
{"value": "has_possession", "args": [{"type": "Event", "value": "e1"}, {"type": "ThemRole", "value": "Agent"}, {"type": "ThemRole", "value": "Theme"}], "negated": false},
{"value": "transfer", "args": [{"type": "Event", "value": "e3"}, {"type": "ThemRole", "value": "Agent"}, {"type": "ThemRole", "value": "Theme"}, {"type": "ThemRole", "value": "Recipient"}], "negated": false}
]
}
}
],
"subclasses": [],
"parent_class": "give-13.1"
}
PropBank¶
JSON Schema:
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["predicate_lemma", "rolesets"],
"properties": {
"predicate_lemma": {
"type": "string",
"pattern": "^[a-z][a-z0-9_\\-\\.]*$",
"description": "The predicate lemma (e.g., 'give', 'abandon')"
},
"rolesets": {
"type": "array",
"items": {
"type": "object",
"required": ["id", "roles"],
"properties": {
"id": {
"type": "string",
"pattern": "^[a-z][a-z0-9_\\-\\.]+\\.(\\d{2}|LV)$",
"description": "Roleset ID (e.g., 'give.01')"
},
"name": {"type": ["string", "null"]},
"aliases": {
"type": "object",
"properties": {
"alias": {
"type": "array",
"items": {
"type": "object",
"properties": {
"text": {"type": "string"},
"pos": {"type": "string", "enum": ["n", "v", "j", "l", "m"]}
}
}
},
"argalias": {"type": "array"}
}
},
"roles": {
"type": "array",
"items": {
"type": "object",
"required": ["n", "f", "descr"],
"properties": {
"n": {
"type": "string",
"pattern": "^([0-7]|A|M)$",
"description": "Argument number"
},
"f": {
"type": "string",
"description": "Function tag"
},
"descr": {"type": "string"},
"rolelinks": {
"type": "array",
"items": {
"type": "object",
"properties": {
"class_name": {"type": "string"},
"resource": {"type": "string"},
"version": {"type": "string"},
"role": {"type": "string"}
}
}
}
}
}
},
"examples": {
"type": "array",
"items": {
"type": "object",
"required": ["text"],
"properties": {
"name": {"type": ["string", "null"]},
"text": {"type": "string"},
"propbank": {
"type": "object",
"properties": {
"args": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {"type": "string"},
"start": {"type": ["integer", "string"]},
"end": {"type": ["integer", "string"]},
"text": {"type": "string"}
}
}
},
"rel": {
"type": "object",
"properties": {
"relloc": {"type": "string"},
"text": {"type": "string"}
}
}
}
}
}
}
}
}
}
},
"notes": {"type": "array", "items": {"type": "string"}}
}
}
Example Entry:
{
"predicate_lemma": "give",
"rolesets": [
{
"id": "give.01",
"name": "transfer",
"aliases": {
"alias": [
{"text": "giving", "pos": "n"},
{"text": "give", "pos": "v"}
],
"argalias": []
},
"roles": [
{
"n": "0",
"f": "PAG",
"descr": "giver",
"rolelinks": [
{"class_name": "give-13.1-1", "resource": "VerbNet", "version": "verbnet3.4", "role": "agent"},
{"class_name": "Giving", "resource": "FrameNet", "version": "1.7", "role": "donor"}
]
},
{
"n": "1",
"f": "PPT",
"descr": "thing given",
"rolelinks": [
{"class_name": "give-13.1-1", "resource": "VerbNet", "version": "verbnet3.4", "role": "theme"},
{"class_name": "Giving", "resource": "FrameNet", "version": "1.7", "role": "theme"}
]
},
{
"n": "2",
"f": "GOL",
"descr": "entity given to",
"rolelinks": [
{"class_name": "give-13.1-1", "resource": "VerbNet", "version": "verbnet3.4", "role": "recipient"},
{"class_name": "Giving", "resource": "FrameNet", "version": "1.7", "role": "recipient"}
]
}
],
"examples": [
{
"name": "give-v: double object",
"text": "The executives gave the chefs a standing ovation.",
"propbank": {
"args": [
{"type": "ARG0", "start": 0, "end": 1, "text": "The executives"},
{"type": "ARG2", "start": 3, "end": 4, "text": "the chefs"},
{"type": "ARG1", "start": 5, "end": 7, "text": "a standing ovation"}
],
"rel": {"relloc": "2", "text": "gave"}
}
}
]
}
]
}
WordNet¶
JSON Schema:
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["offset", "lex_filenum", "lex_filename", "ss_type", "words", "pointers", "gloss"],
"properties": {
"offset": {
"type": "string",
"pattern": "^\\d{8}$",
"description": "8-digit synset identifier"
},
"lex_filenum": {
"type": "integer",
"minimum": 0,
"maximum": 44
},
"lex_filename": {
"type": "string",
"description": "Lexical file name (e.g., 'verb.perception')"
},
"ss_type": {
"type": "string",
"enum": ["n", "v", "a", "r", "s"],
"description": "Synset type"
},
"words": {
"type": "array",
"items": {
"type": "object",
"required": ["lemma", "lex_id"],
"properties": {
"lemma": {"type": "string"},
"lex_id": {
"type": "integer",
"minimum": 0,
"maximum": 15
}
}
}
},
"pointers": {
"type": "array",
"items": {
"type": "object",
"required": ["symbol", "offset", "pos", "source", "target"],
"properties": {
"symbol": {"type": "string"},
"offset": {"type": "string", "pattern": "^\\d{8}$"},
"pos": {"type": "string", "enum": ["n", "v", "a", "r", "s"]},
"source": {"type": "integer", "minimum": 0},
"target": {"type": "integer", "minimum": 0}
}
}
},
"frames": {
"type": "array",
"items": {
"type": "object",
"properties": {
"frame_number": {"type": "integer", "minimum": 1, "maximum": 35},
"word_indices": {"type": "array", "items": {"type": "integer"}}
}
}
},
"gloss": {
"type": "string",
"description": "Definition and examples"
}
}
}
Example Entry:
{
"offset": "02204104",
"lex_filenum": 40,
"lex_filename": "verb.perception",
"ss_type": "v",
"words": [
{"lemma": "give", "lex_id": 0}
],
"pointers": [
{"symbol": ">", "offset": "02208144", "pos": "v", "source": 0, "target": 0},
{"symbol": "@", "offset": "02225243", "pos": "v", "source": 0, "target": 0},
{"symbol": "+", "offset": "10045455", "pos": "n", "source": 1, "target": 2}
],
"frames": [],
"gloss": "transfer possession of something concrete or abstract to somebody; \"I gave her my money\"; \"can you give me lessons?\"; \"She gave the children lots of love and tender loving care\""
}
FrameNet¶
JSON Schema:
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["id", "name", "definition", "frame_elements"],
"properties": {
"id": {
"type": "integer",
"minimum": 1,
"description": "Unique frame identifier"
},
"name": {
"type": "string",
"pattern": "^[A-Z][A-Za-z0-9_]*$",
"description": "Frame name (e.g., 'Giving')"
},
"definition": {
"type": "object",
"properties": {
"raw_text": {"type": "string"},
"plain_text": {"type": "string"},
"annotations": {"type": "array"}
}
},
"frame_elements": {
"type": "array",
"items": {
"type": "object",
"required": ["id", "name", "abbrev", "definition", "core_type", "bg_color", "fg_color"],
"properties": {
"id": {"type": "integer", "minimum": 1},
"name": {
"type": "string",
"pattern": "^[A-Z][A-Za-z0-9_]*$"
},
"abbrev": {
"type": "string",
"pattern": "^[A-Z][A-Za-z0-9]{0,4}$"
},
"definition": {
"type": "object",
"properties": {
"raw_text": {"type": "string"},
"plain_text": {"type": "string"},
"annotations": {"type": "array"}
}
},
"core_type": {
"type": "string",
"enum": ["Core", "Core-Unexpressed", "Peripheral", "Extra-Thematic"]
},
"bg_color": {
"type": "string",
"pattern": "^[0-9A-F]{6}$"
},
"fg_color": {
"type": "string",
"pattern": "^[0-9A-F]{6}$"
},
"requires_fe": {"type": "array", "items": {"type": "string"}},
"excludes_fe": {"type": "array", "items": {"type": "string"}},
"semtype_refs": {"type": "array", "items": {"type": "integer"}}
}
}
},
"lexical_units": {"type": "array"},
"frame_relations": {"type": "array"},
"created_by": {"type": ["string", "null"]},
"created_date": {"type": ["string", "null"], "format": "date-time"}
}
}
Example Entry:
{
"id": 139,
"name": "Giving",
"definition": {
"plain_text": "A Donor transfers a Theme from a Donor to a Recipient. This frame includes only actions that are initiated by the Donor (the one that starts out owning the Theme). Sentences (even metaphorical ones) must meet the following entailments: the Donor first has possession of the Theme. Following the transfer the Donor no longer has the Theme and the Recipient does. Barney gave the beer to Moe. $300 was endowed to the university to build a new performing arts building."
},
"frame_elements": [
{
"id": 1052,
"name": "Donor",
"abbrev": "Donor",
"definition": {
"plain_text": "The person that begins in possession of the Theme and causes it to be in the possession of the Recipient."
},
"core_type": "Core",
"bg_color": "FF0000",
"fg_color": "FFFFFF"
},
{
"id": 1053,
"name": "Recipient",
"abbrev": "Rec",
"definition": {
"plain_text": "The entity that ends up in possession of the Theme."
},
"core_type": "Core",
"bg_color": "0000FF",
"fg_color": "FFFFFF"
},
{
"id": 1054,
"name": "Theme",
"abbrev": "Thm",
"definition": {
"plain_text": "The object that changes ownership."
},
"core_type": "Core",
"bg_color": "9400D3",
"fg_color": "FFFFFF"
}
],
"lexical_units": [],
"frame_relations": [],
"created_by": "MJE",
"created_date": "2001-06-23T08:15:16Z"
}
Source Formats¶
XML Sources¶
VerbNet, PropBank, and FrameNet use XML format:
VerbNet Example (give-13.1.xml):
<!DOCTYPE VNCLASS SYSTEM "vn_class-3.dtd">
<VNCLASS ID="give-13.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="vn_schema-3.xsd">
<MEMBERS>
<MEMBER fn_mapping="None" grouping="deal.04" name="deal" verbnet_key="deal#2" wn="deal%2:40:01 deal%2:40:02 deal%2:40:07 deal%2:40:06" features=""/>
<MEMBER fn_mapping="None" grouping="lend.02" name="lend" verbnet_key="lend#1" wn="lend%2:40:00" features=""/>
<MEMBER fn_mapping="Giving" grouping="pass.04" name="pass" verbnet_key="pass#3" wn="pass%2:40:00 pass%2:40:01 pass%2:40:13 pass%2:38:04" features=""/>
</MEMBERS>
<THEMROLES>
<THEMROLE type="Agent">
<SELRESTRS logic="or">
<SELRESTR Value="+" type="animate"/>
<SELRESTR Value="+" type="organization"/>
</SELRESTRS>
</THEMROLE>
<THEMROLE type="Theme">
<SELRESTRS/>
</THEMROLE>
<THEMROLE type="Recipient">
<SELRESTRS logic="or">
<SELRESTR Value="+" type="animate"/>
<SELRESTR Value="+" type="organization"/>
</SELRESTRS>
</THEMROLE>
</THEMROLES>
<FRAMES>
<FRAME>
<DESCRIPTION descriptionNumber="0.2" primary="NP V NP PP.recipient" secondary="NP-PP; Recipient-PP" xtag=""/>
<EXAMPLES>
<EXAMPLE>They lent a bicycle to me.</EXAMPLE>
</EXAMPLES>
<SYNTAX>
<NP value="Agent">
<SYNRESTRS/>
</NP>
<VERB/>
<NP value="Theme">
<SYNRESTRS/>
</NP>
<PREP value="to">
<SELRESTRS/>
</PREP>
<!-- ... more syntax elements ... -->
</SYNTAX>
</FRAME>
</FRAMES>
</VNCLASS>
PropBank Example (give.xml):
<?xml version="1.0" encoding="utf-8" standalone="no"?>
<!DOCTYPE frameset PUBLIC "-//PB//PropBank Frame v3.4 Transitional//EN" "http://propbank.org/specification/dtds/v3.4/frameset.dtd">
<frameset>
<predicate lemma="give">
<roleset id="give.01" name="transfer">
<aliases>
<alias pos="n">giving</alias>
<alias pos="v">give</alias>
</aliases>
<roles>
<role descr="giver" f="PAG" n="0">
<rolelinks>
<rolelink class="give-13.1-1" resource="VerbNet" version="verbnet3.4">agent</rolelink>
<rolelink class="Giving" resource="FrameNet" version="1.7">donor</rolelink>
</rolelinks>
</role>
<role descr="thing given" f="PPT" n="1">
<rolelinks>
<rolelink class="give-13.1-1" resource="VerbNet" version="verbnet3.4">theme</rolelink>
<rolelink class="Giving" resource="FrameNet" version="1.7">theme</rolelink>
</rolelinks>
</role>
<role descr="entity given to" f="GOL" n="2">
<rolelinks>
<rolelink class="give-13.1-1" resource="VerbNet" version="verbnet3.4">recipient</rolelink>
<rolelink class="Giving" resource="FrameNet" version="1.7">recipient</rolelink>
</rolelinks>
</role>
</roles>
<example name="give-v: double object" src="">
<text>The executives gave the chefs a standing ovation.</text>
<propbank>
<rel relloc="2">gave</rel>
<arg type="ARG0" start="0" end="1">The executives</arg>
<arg type="ARG2" start="3" end="4">the chefs</arg>
<arg type="ARG1" start="5" end="7">a standing ovation</arg>
</propbank>
</example>
</roleset>
</predicate>
</frameset>
FrameNet Example (Giving.xml):
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<frame cBy="MJE" cDate="06/23/2001 08:15:16 PDT Sat" name="Giving" ID="139" xmlns="http://framenet.icsi.berkeley.edu">
<definition><def-root>A <fen>Donor</fen> transfers a <fen>Theme</fen> from a <fen>Donor</fen> to a <fen>Recipient</fen>.
This frame includes only actions that are initiated by the <fen>Donor</fen> (the one that starts out owning the <fen>Theme</fen>).</def-root></definition>
<FE bgColor="FF0000" fgColor="FFFFFF" coreType="Core" cBy="MJE" cDate="06/23/2001 08:17:21 PDT Sat" abbrev="Donor" name="Donor" ID="1052">
<definition><def-root>The person that begins in possession of the <fen>Theme</fen> and causes it to be in the possession of the <fen>Recipient</fen>.</def-root></definition>
</FE>
<FE bgColor="0000FF" fgColor="FFFFFF" coreType="Core" cBy="MJE" cDate="06/23/2001 08:18:41 PDT Sat" abbrev="Rec" name="Recipient" ID="1053">
<definition><def-root>The entity that ends up in possession of the <fen>Theme</fen>.</def-root></definition>
</FE>
<FE bgColor="9400D3" fgColor="FFFFFF" coreType="Core" cBy="MJE" cDate="06/23/2001 08:19:30 PDT Sat" abbrev="Thm" name="Theme" ID="1054">
<definition><def-root>The object that changes ownership.</def-root></definition>
<semType name="Physical_object" ID="68"/>
</FE>
<!-- ... more frame elements ... -->
</frame>
WordNet Database Format¶
WordNet uses a custom text-based database format (data.verb):
00675490 31 v 01 give 0 001 @ 00674352 v 0000 01 + 14 00 | estimate the duration or outcome of something; "He gave the patient three months to live"; "I gave him a very good chance at success"
00734247 31 v 03 give 2 pay 0 devote 0 002 @ 00630153 v 0000 $ 02348591 v 0000 02 + 15 00 + 21 00 | dedicate; "give thought to"; "give priority to"; "pay attention to"
00750978 32 v 01 give 7 001 @ 00803980 v 0000 01 + 15 00 | allow to have or take; "I give you two minutes to respond"
Each line contains: synset offset, lexical file number, synset type, word count, words with lexical IDs, pointer count, pointers, and gloss.