<!--***** ************************************************************* ******-->
<!--***** START OF OLIF HEADER ******-->
<!--***** ************************************************************* ******-->
<!--***** ************************************************************* ******-->
<!--***** ADMINISTRATIVE INFORMATION ******-->
<!--***** ************************************************************* ******-->
<!--***** Version:
* $DateTime: 2002/02/07 10:01:57 $ $Revision: #15 $
* Contents:
* Header elements of the Open Lexicon Interchange Format (OLIF)
* Public Identifier:
* -//OLIF Consortium:2002//ELEMENTS OLIF 2.0: Header//EN
* System Identifier:
* oHeader.mod
* Dependencies (this DTD depends on the presence of):
* oHeaderV.mod
* Contact:
* www.olif.net
* Status:
* Copyright (C) 2002, OLIF Consortium
* Comments:
* - one may consider using XPath for valueDefault
* - use 'note' and 'prop' from 'oKGDC.mod'
******-->
<!--***** ************************************************************* ******-->
<!--***** SUPPLEMENTARY DECLARATIONS ******-->
<!--***** ************************************************************* ******-->
<!ENTITY % headerValues PUBLIC
"-//OLIF Consortium:2002//ELEMENTS OLIF 2.0: Header Values//EN"
"oHeaderV.mod" >
%headerValues;
<!--***** ************************************************************* ******-->
<!--***** DECLARATIONS ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT header
(
publStmt?,
dataCatReg?,
contentInfo,
workflowInfo?,
locInfo?,
termExtractInfo?,
(replacements | note | prop)*
) >
<!ATTLIST header
%creaTool.att;
%creaToolVersion.att;
%origFormat.att;
%adminLang.att;
%creaDate.att;
%creaId.att; >
<!--
<i>
<t>e</t>
<n>header</n>
<d>The header element groups data categories information about the data
that has been encoded (thus, header holds meta-data).</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** data category registry ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT dataCatReg
(
ptOfSpeechDCS?,
subjFieldDCS?,
semReadingDCS?,
morphStructDCS?,
inflectionDCS?,
aspectDCS?,
synTypeDCS?,
synFrameDCS?,
synStructDCS?,
semTypeDCS?,
conceptHierarchyDCS?
) >
<!--
<i>
<t>e</t>
<n>dataCatReg</n>
<d>The dataCatReg element groups data categories for extensions to
extensible OLIF data categories (like ptOfSpeech). The idea is that
whenever a user chooses to make use of a user extension (and for
example supplies his own tag set for part-of-speech), he explains
the overall listing of the data categories
and values he uses (for example via a URL that he puts into the
ptOfSpeechDCS element of the dataCatReg element). The dataCatReg
element contains several data category specifications (DCS).</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** data category specifications ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT ptOfSpeechDCS %ptOfSpeechDCS.pcd; >
<!ATTLIST ptOfSpeechDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>ptOfSpeechDCS</n>
<d>The ptOfSpeechDCS element (DCS is short for data category
specification) holds data about a user-extended scheme for describing
the part-of-speech of OLIF entries. Users can for example describe
their additional part-of-speech tags by means of a URL or by means
of CDATA sections.
Example uses:
<ptOfSpeechDCS DCSType="extension">
http://www.company.com/nlp/ptOfSpeech/projectX.htm
</ptOfSpeechDCS>
<ptOfSpeechDCS DCSType="extension">
<![ CDATA [
We add the following part of speech tags to the ones
already defined in OLIF
ng - nominal group
vg - verbal group
pg - prepositional group
]]>
</ptOfSpeechDCS>
</d>
</i>
-->
<!ELEMENT subjFieldDCS %subjFieldDCS.pcd; >
<!ATTLIST subjFieldDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>subjFieldDCS</n>
<d>The subjFieldDCS element holds data about a user-extended scheme for
describing the subject field information of OLIF entries (see the
comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT semReadingDCS %semReadingDCS.pcd; >
<!ATTLIST semReadingDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>semReadingDCS</n>
<d>The semReadingDCS element holds data about a user-extended scheme for
describing the semantic reading information of OLIF entries (see the
comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT crLinkTypeDCS %crLinkTypeDCS.pcd; >
<!ATTLIST crLinkTypeDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>crLinkTypeDCS</n>
<d>The crLinkTypeDCS element holds data about a user-extended scheme for
describing the types of cross-references between OLIF entries (see the
comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT orthVariantTypeDCS %orthVariantTypeDCS.pcd; >
<!ATTLIST orthVariantTypeDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>orthVariantTypeDCS</n>
<d>The orthVariantTypeDCS element holds data about a user-extended
scheme for describing the orthographic variants of OLIF entries (see
the comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT morphStructDCS %morphStructDCS.pcd; >
<!ATTLIST morphStructDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>morphStructDCS</n>
<d>The morphStructDCS element holds data about a user-extended scheme for
describing the internal morphological structure of entry
strings/designators (see the comment for the ptOfSpeechDCS element for
more information).</d>
</i>
-->
<!ELEMENT inflectionDCS %inflectionDCS.pcd; >
<!ATTLIST inflectionDCS
%dcsType.att;
%inflectionDCSType.att; >
<!--
<i>
<t>e</t>
<n>inflectionDCS</n>
<d>The inflectionDCS element holds data about a user-extended
scheme for describing the inflection of OLIF entries (see
the comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT aspectDCS %aspectDCS.pcd; >
<!ATTLIST aspectDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>aspectDCS</n>
<d>The aspectDCS element holds data about a user-extended
scheme for describing the aspect of OLIF entries (see
the comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT synTypeDCS %synTypeDCS.pcd; >
<!ATTLIST synTypeDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>synTypeDCS</n>
<d>The synTypeDCS element holds data about a user-extended
scheme for describing the syntactic type of OLIF entries (see
the comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT synFrameDCS %synFrameDCS.pcd; >
<!ATTLIST synFrameDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>synFrameDCS</n>
<d>The synFrameDCS element holds data about a user-extended
scheme for describing the syntactic frames of OLIF entries (see
the comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT synStructDCS %synStructDCS.pcd; >
<!ATTLIST synStructDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>synStructDCS</n>
<d>The synStructDCS element holds data about a user-extended
scheme for describing the syntactic structures of OLIF entries (see
the comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT semTypeDCS %semTypeDCS.pcd; >
<!ATTLIST semTypeDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>semTypeDCS</n>
<d>The semTypeDCS element holds data about a user-extended
scheme for describing the semantic types of OLIF entries (see
the comment for the ptOfSpeechDCS element for more information).</d>
</i>
-->
<!ELEMENT conceptHierarchyDCS %conceptHierarchyDCS.pcd; >
<!ATTLIST conceptHierarchyDCS
%dcsType.att; >
<!--
<i>
<t>e</t>
<n>conceptHierarchyDCS</n>
<d>The conceptHierarchyDCS element holds data about a user-extended
scheme for describing the concept hierarchy/ontology of OLIF entries
(see the comment for the ptOfSpeechDCS element for more
information).</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** information related to encodings of terms ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT contentInfo
(
quotMarkInfo,
syllabificationMarkInfo?,
abbrevHandling?,
langIdUse,
valueDefaults?
) >
<!--
<i>
<t>e</t>
<n>contentInfo</n>
<d>The contentInfo element groups data categories related to the
practice adopted for encoding quotation marks, abbreviations etc.</d>
</i>
-->
<!ELEMENT quotMarkInfo %quotMarkInfo.pcd; >
<!ATTLIST quotMarkInfo
%quotMarkRet.att;
%quotMarkForm.att; >
<!--
<i>
<t>e</t>
<n>quotMarkInfo</n>
<d>The quotMarkInfo element holds data about editorial practice
adopted with respect to quotation marks.
Example use: our open quote is '!' and our closing quote is '$'</d>
</i>
-->
<!ELEMENT syllabificationMarkInfo %syllabificationMarkInfo.pcd; >
<!--
<i>
<t>e</t>
<n>syllabificationMarkInfo</n>
<d>The syllabificationMarkInfo element holds data about editorial
practice adopted with respect to syllabification in the original.
Example use: we use '*' as marker</d>
</i>
-->
<!ELEMENT abbrevHandling %abbrevHandling.pcd; >
<!--
<i>
<t>e</t>
<n>abbrevHandling</n>
<d>The abbrevHandling element holds data about the way how abbreviations
are represented. Two options exist: via the abbrev element or via a
crossRefer element.
Example use: we use both the abbrev element,
and the crossRefer element</d>
</i>
-->
<!ELEMENT langIdUse %langIdUse.pcd; >
<!--
<i>
<t>e</t>
<n>langIdUse</n>
<d>The langIdUse element holds data about the way language
identifers have been used.
Possible values:
region_standard - the region part of a locale (e.g. the CA
in FR_CA) has been used even if the term also
exists in the unrestricted locale (e.g. French
as a whole).
region_exception - the region part of a locale only has been
used if the term does not exist in the
unrestricted locale.</d>
</i>
-->
<!ELEMENT valueDefaults (valDefault*) >
<!--
<i>
<t>e</t>
<n>valueDefaults</n>
<d>The valueDefaults element groups information about the default
values for various data categories. Whenever an OLIF entry does not
specify a value for one of these data categories, information from
the valueDefaults element should be applied.</d>
</i>
-->
<!ELEMENT valDefault %valDefault.pcd; >
<!ATTLIST valDefault
%valDefaultRefType.att;
%valDefaultRefName.att; >
<!--
<i>
<t>e</t>
<n>valDefault</n>
<d>The valDefault element holds data about the default
value for one specific data category.
Example use: The example below shows how to set the default for
the data category 'product' to the string 'OLIF Converter':
<valDefault valDefaultRefType="e" valDefaultRefName="product">
OLIF Converter
</valDefault>
</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** workflow information ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT workflowInfo %workflowInfo.pcd; >
<!--
<i>
<t>e</t>
<n>workflowInfo</n>
<d>The workflowInfo element holds data about user-specific workflow
support.
Example use: to be validated by 31 Dec 2001 at the latest</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** information from terminology extraction ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT termExtractInfo %termExtractInfo.pcd; >
<!--
<i>
<t>e</t>
<n>termExtractInfo</n>
<d>The termExtractInfo element holds data which is relevant for
terminology extraction (e.g. name and size of corpus to
which term extraction has been applied).</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** physical/technical information ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT fileDesc
(
fileName?,
fileId,
fileExtent
) >
<!--
<i>
<t>e</t>
<n>fileDesc</n>
<d>The fileDesc element groups data categories relating to physical
features of the OLIF instance (document).</d>
</i>
-->
<!ELEMENT fileName %fileName.pcd; >
<!--
<i>
<t>e</t>
<n>fileName</n>
<d>The fileName element holds data about the name of the OLIF file.
Example use: olifForAgency14Jan02.xml</d>
</i>
-->
<!ELEMENT fileId %fileId.pcd; >
<!--
<i>
<t>e</t>
<n>fileId</n>
<d>The fileId element holds data about a unique identifier (e.g. a
globally unique identifier) of the OLIF file.
Example use: 011000358700000683362001E.xml</d>
</i>
-->
<!ELEMENT fileExtent
(
conceptCount?,
entryCount,
termCount,
byteCount
) >
<!--
<i>
<t>e</t>
<n>fileExtent</n>
<d>The fileExtent element groups data categories related to counts of
items (for example number of entries) in the contents of the OLIF
instance.</d>
</i>
-->
<!ELEMENT conceptCount %conceptCount.pcd; >
<!--
<i>
<t>e</t>
<n>conceptCount</n>
<d>The conceptCount element holds data about the number of concepts in
the OLIF document.</d>
</i>
-->
<!ELEMENT entryCount %entryCount.pcd; >
<!--
<i>
<t>e</t>
<n>entryCount</n>
<d>The entryCount element holds data about the number of entries in the
OLIF document.</d>
</i>
-->
<!ELEMENT termCount %termCount.pcd; >
<!--
<i>
<t>e</t>
<n>termCount</n>
<d>The termCount element holds data about the number of terms
(generally defined as those entries which are both not general
vocabulary and distinguished from one another by the values of the
key data categories) in the OLIF document.</d>
</i>
-->
<!ELEMENT byteCount %byteCount.pcd; >
<!ATTLIST byteCount
%byteCountUnit.att; >
<!--
<i>
<t>e</t>
<n>byteCount</n>
<d>The byteCount element holds data about the size of the OLIF document
including its tags, in its representation as a text file encoded in
the character set mentioned in the encoding attribute of the XML
declaration. This is useful for calculating media requirements or file
download times.</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** administrative information ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT publStmt
(
distributor,
owner,
idNo*,
availability,
date
) >
<!--
<i>
<t>e</t>
<n>publStmt</n>
<d>The pubStmt element groups data categories related to the distributor
and the owner of the OLIF document. The publStmt element also gives
supplementary information about the OLIF document (e.g. copyright
protection).</d>
</i>
-->
<!ELEMENT distributor
(
name,
address*,
telephone*,
fax*,
eAddress*
) >
<!ATTLIST distributor
%distributorType.att; >
<!--
<i>
<t>e</t>
<n>distributor</n>
<d>The distributor element holds data about the person or
institution who distributes the OLIF document.</d>
</i>
-->
<!ELEMENT address %address.pcd; >
<!--
<i>
<t>e</t>
<n>address</n>
<d>The address element holds data about a postal address of the
distributor.</d>
</i>
-->
<!ELEMENT telephone %telephone.pcd; >
<!--
<i>
<t>e</t>
<n>telephone</n>
<d>The telephone element holds data about the telephone number of the
person or institution who distributes the OLIF file (preferably in a
format conformant to ITU-T/CCITT Recommendation E.123).</d>
</i>
-->
<!ELEMENT fax %fax.pcd; >
<!--
<i>
<t>e</t>
<n>fax</n>
<d>The fax element holds data about the fax number of the person or
institution who distributes the OLIF file (preferably in a format
conformant to ITU-T/CCITT Recommendation E.123.</d>
</i>
-->
<!ELEMENT eAddress %eAddress.pcd; >
<!ATTLIST eAddress
%eAddressType.att; >
<!--
<i>
<t>e</t>
<n>eAddress</n>
<d>The eAddress element holds data about an electronic address of the
person or institution who distributes the OLIF file. Note that more
than one occurrence of this tag can appear, so that multiple addresses
(possibly of different types) can be included.</d>
</i>
-->
<!ELEMENT availability %availability.pcd; >
<!ATTLIST availability
%region.att;
%pubStatus.att; >
<!--
<i>
<t>e</t>
<n>availability</n>
<d>The availability element holds data about the availability
of an OLIF file, for example, any restrictions on its use or distribution,
its copyright status, etc. A company may use 'Available upon written
agreement' to indicate that the OLIF file may not be freely
redistributed.</d>
</i>
-->
<!ELEMENT idNo %idNo.pcd; >
<!ATTLIST idNo
%idNoType.att; >
<!--
<i>
<t>e</t>
<n>idNo</n>
<d>The idNo element holds data about a number (e.g. ISBN) used to identify
an OLIF document.</d>
</i>
-->
<!ELEMENT date %date.pcd; >
<!ATTLIST date
%dateValue.att; >
<!--
<i>
<t>e</t>
<n>date</n>
<d>The date element holds data about a date. Its value must be in ASCII,
in the format YYYYMMDDThhmmssZ. (e.g. 19970811T133402Z for
August 11th 1997 at 1:34pm 2 seconds.) This is one of the options
described in ISO 8601:1988. The value is preferably given in
Coordinated Universal Time (UTC; as indicated by the terminal Z). The
DateValue attribute can be used to specify the date in an arbitrary
format.</d>
</i>
-->
<!ELEMENT owner
(
name,
address*,
telephone*,
fax*,
eAddress*
) >
<!ATTLIST owner
%ownerType.att; >
<!--
<i>
<t>e</t>
<n>owner</n>
<d>The owner element holds data about the person, or institution that
owns the OLIF document.</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** data compression ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT replacements (mapping)+ >
<!--
<i>
<t>e</t>
<n>replacements</n>
<d>The replacements element groups data categories for string
replacements that should be applied to the document. The replacement
element helps to compress data and might for example specify one
value for the date element of a list of 1000 elements.</d>
</i>
-->
<!ELEMENT mapping (mappingValue, mappingTarget+) >
<!--
<i>
<t>e</t>
<n>mapping</n>
<d>The mapping element groups a mapValue and a mapTarget. The
mapValue should be used for the item designated by the mapTarget.</d>
</i>
-->
<!ELEMENT mappingValue %mappingValue.pcd; >
<!--
<i>
<t>e</t>
<n>mappingValue</n>
<d>The mapping element holds data about a replacement string that is
used in a mapping.</d>
</i>
-->
<!ELEMENT mappingTarget %mappingTarget.pcd; >
<!--
<i>
<t>e</t>
<n>mappingTarget</n>
<d>The mappingTarget element holds data about an item to which a
replacement should be applied.</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** miscelleanous ******-->
<!--***** ************************************************************* ******-->
<!ELEMENT name %name.pcd; >
<!--
<i>
<t>e</t>
<n>name</n>
<d>The name element holds data about a name (e.g. of a distributor or
owner).</d>
</i>
-->
<!ELEMENT prop %prop.pcd; >
<!ATTLIST prop
%propType.att;
%propLang.att; >
<!--
<i>
<t>e</t>
<n>prop</n>
<d>The prop element holds data about non-standard (proprietary)
information in an OLIF document. It may be used for communicating
tool-specific information.</d>
</i>
-->
<!--***** ************************************************************* ******-->
<!--***** END OF OLIF HEADER ******-->
<!--***** ************************************************************* ******-->