solr 的schema.xml配置文件详解解析

访客 阅读:232 2021-03-31 21:34:41 评论:0

schema.xml 是做什么的?



<fieldType name="string" class="solr.StrField" sortMissingLast="true" /> 
    <!-- boolean type: "true" or "false" --> 
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> 
    <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are 
         currently supported on types that are sorted internally as strings 
         and on numeric types. 
	     This includes "string","boolean", and, as of 3.5 (and 4.x), 
	     int, float, long, date, double, including the "Trie" variants. 
       - If sortMissingLast="true", then a sort on this field will cause documents 
         without the field to come after documents with the field, 
         regardless of the requested sort order (asc or desc). 
       - If sortMissingFirst="true", then a sort on this field will cause documents 
         without the field to come before documents with the field, 
         regardless of the requested sort order. 
       - If sortMissingLast="false" and sortMissingFirst="false" (the default), 
         then default lucene sorting will be used which places docs without the 
         field first in an ascending sort and last in a descending sort. 
      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types. 
      These fields support doc values, but they require the field to be 
      single-valued and either be required or have a default value. 
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> 
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> 
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> 
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/> 
     Numeric field types that index each value at various levels of precision 
     to accelerate range queries when the number of values between the range 
     endpoints is large. See the javadoc for NumericRangeQuery for internal 
     implementation details. 
     Smaller precisionStep values (specified in bits) will lead to more tokens 
     indexed per value, slightly larger index size, and faster range queries. 
     A precisionStep of 0 disables indexing at different precision levels. 
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> 
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> 
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> 
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> 
    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and 
         is a more restricted form of the canonical representation of dateTime      
         The trailing "Z" designates UTC time and is mandatory. 
         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z 
         All other components are mandatory. 
         Expressions can also be used to denote calculations that should be 
         performed relative to "NOW" to determine the value, ie... 
                  ... Round to the start of the current hour 
                  ... Exactly 1 day prior to now 
                  ... 6 months and 3 days in the future from the start of 
                      the current day 
         Consult the DateField javadocs for more information. 
         Note: For faster range queries, consider the tdate type 
    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> 
    <!-- A Trie based date field for faster date range queries and date faceting. --> 
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> 
    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> 
    <fieldtype name="binary" class="solr.BinaryField"/> 
      These should only be used for compatibility with existing indexes (created with lucene or older Solr versions). 
      Use Trie based fields instead. As of Solr 3.5 and 4.x, Trie based fields support sortMissingFirst/Last 
      Plain numeric field types that store and index the text 
      value verbatim (and hence don't correctly support range queries, since the 
      lexicographic ordering isn't equal to the numeric ordering) 
      NOTE: These field types are deprecated will be completely removed in Solr 5.0! 
    <fieldType name="pint" class="solr.IntField"/> 
    <fieldType name="plong" class="solr.LongField"/> 
    <fieldType name="pfloat" class="solr.FloatField"/> 
    <fieldType name="pdouble" class="solr.DoubleField"/> 
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/> 
    <!-- The "RandomSortField" is not used to store or search any 
         data.  You can declare fields of this type it in your schema 
         to generate pseudo-random orderings of your docs for sorting  
         or function purposes.  The ordering is generated based on the field 
         name and the version of the index. As long as the index version 
         remains unchanged, and the same field name is reused, 
         the ordering of the docs will be consistent.   
         If you want different psuedo-random orderings of documents, 
         for the same version of the index, use a dynamicField and 
         change the field name in the request. 
    <fieldType name="random" class="solr.RandomSortField" indexed="true" />










<field name="_version_" type="long" indexed="true" stored="true"/> 
   <!-- points to the root document of a block of nested documents. Required for nested 
      document support, may be removed otherwise 
   <field name="_root_" type="string" indexed="true" stored="false"/> 
   <!-- Only remove the "id" field if you have a very good reason to. While not strictly 
     required, it is highly recommended. A <uniqueKey> is present in almost all Solr  
     installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id". 
   <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />  
   <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/> 
   <field name="name" type="text_general" indexed="true" stored="true"/> 
   <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/> 
   <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/> 
   <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/> 
   <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> 
   <field name="weight" type="float" indexed="true" stored="true"/> 
   <field name="price"  type="float" indexed="true" stored="true"/> 
   <field name="popularity" type="int" indexed="true" stored="true" /> 
   <field name="inStock" type="boolean" indexed="true" stored="true" /> 
   <field name="store" type="location" indexed="true" stored="true"/> 
   <!-- Common metadata fields, named specifically to match up with 
     SolrCell metadata when parsing rich documents such as Word, PDF. 
     Some fields are multiValued only because Tika currently may return 
     multiple values for them. Some metadata is parsed from the documents, 
     but there are some which come from the client context: 
       "content_type": From the HTTP headers of incoming stream 
       "resourcename": From SolrCell request param 
   <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/> 
   <field name="subject" type="text_general" indexed="true" stored="true"/> 
   <field name="description" type="text_general" indexed="true" stored="true"/> 
   <field name="comments" type="text_general" indexed="true" stored="true"/> 
   <field name="author" type="text_general" indexed="true" stored="true"/> 
   <field name="keywords" type="text_general" indexed="true" stored="true"/> 
   <field name="category" type="text_general" indexed="true" stored="true"/> 
   <field name="resourcename" type="text_general" indexed="true" stored="true"/> 
   <field name="url" type="text_general" indexed="true" stored="true"/> 
   <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/> 
   <field name="last_modified" type="date" indexed="true" stored="true"/> 
   <field name="links" type="string" indexed="true" stored="true" multiValued="true"/> 
   <!-- Main body of document extracted by SolrCell. 
        NOTE: This field is not indexed by default, since it is also copied to "text" 
        using copyField below. This is to save space. Use this field for returning and 
        highlighting document content. Use the "text" field to search the content. --> 
   <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/> 
   <!-- catchall field, containing all other searchable text fields (implemented 
        via copyField further on in this schema  --> 
   <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> 
   <!-- catchall text field that indexes tokens both normally and in reverse for efficient 
        leading wildcard queries. --> 
   <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/> 
   <!-- non-tokenized version of manufacturer to make it easier to sort or group 
        results by manufacturer.  copied from "manu" via copyField --> 
   <field name="manu_exact" type="string" indexed="true" stored="false"/> 
   <field name="payloads" type="payloads" indexed="true" stored="true"/>




indexed:是否被索引,只有设置为true的字段才能进行搜索排序分片(earchable, sortable, facetable)。




<copyField source="cat" dest="text"/> 
   <copyField source="name" dest="text"/> 
   <copyField source="manu" dest="text"/> 
   <copyField source="features" dest="text"/> 
   <copyField source="includes" dest="text"/> 
   <copyField source="manu" dest="manu_exact"/> 
   <!-- Copy the price into a currency enabled field (default USD) --> 
   <copyField source="price" dest="price_c"/> 
   <!-- Text fields from SolrCell to search by default in our catch-all field --> 
   <copyField source="title" dest="text"/> 
   <copyField source="author" dest="text"/> 
   <copyField source="description" dest="text"/> 
   <copyField source="keywords" dest="text"/> 
   <copyField source="content" dest="text"/> 
   <copyField source="content_type" dest="text"/> 
   <copyField source="resourcename" dest="text"/> 
   <copyField source="url" dest="text"/> 
   <!-- Create a string version of author for faceting --> 
   <copyField source="author" dest="author_s"/>







<dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/> 
   <dynamicField name="*_is" type="int"    indexed="true"  stored="true"  multiValued="true"/> 
   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true" /> 
   <dynamicField name="*_ss" type="string"  indexed="true"  stored="true" multiValued="true"/> 
   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/> 
   <dynamicField name="*_ls" type="long"   indexed="true"  stored="true"  multiValued="true"/> 
   <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/> 
   <dynamicField name="*_txt" type="text_general"   indexed="true"  stored="true" multiValued="true"/> 
   <dynamicField name="*_en"  type="text_en"    indexed="true"  stored="true" multiValued="true"/> 
   <dynamicField name="*_b"  type="boolean" indexed="true" stored="true"/> 
   <dynamicField name="*_bs" type="boolean" indexed="true" stored="true"  multiValued="true"/> 
   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/> 
   <dynamicField name="*_fs" type="float"  indexed="true"  stored="true"  multiValued="true"/> 
   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/> 
   <dynamicField name="*_ds" type="double" indexed="true"  stored="true"  multiValued="true"/> 
   <!-- Type used to index the lat and lon components for the "location" FieldType --> 
   <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false" /> 
   <dynamicField name="*_dt"  type="date"    indexed="true"  stored="true"/> 
   <dynamicField name="*_dts" type="date"    indexed="true"  stored="true" multiValued="true"/> 
   <dynamicField name="*_p"  type="location" indexed="true" stored="true"/> 
   <!-- some trie-coded dynamic fields for faster range queries --> 
   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/> 
   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/> 
   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/> 
   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/> 
   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/> 
   <dynamicField name="*_c"   type="currency" indexed="true"  stored="true"/> 
   <dynamicField name="ignored_*" type="ignored" multiValued="true"/> 
   <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> 
   <dynamicField name="random_*" type="random" /> 



solrQueryParser:认搜索操作符参数,及搜索短语间的逻辑,用AND增加准确率,用OR增加覆盖面,建议用AND,也可在搜索语句中定义。例如搜索“河西 万达”,使用AND默认搜索为“河西AND万达“。

<solrQueryParser defaultOperator="OR"/>