1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16   
  17   
  18  r"""searchconnection.py: A connection to the search engine for searching. 
  19   
  20  """ 
  21  __docformat__ = "restructuredtext en" 
  22   
  23  import _checkxapian 
  24  import os as _os 
  25  import cPickle as _cPickle 
  26  import math 
  27   
  28  import xapian as _xapian 
  29  from datastructures import * 
  30  from fieldactions import * 
  31  import fieldmappings as _fieldmappings 
  32  import highlight as _highlight  
  33  import errors as _errors 
  34  import indexerconnection as _indexerconnection 
  35  import re as _re 
  36  from replaylog import log as _log 
  37   
  39      """A result from a search. 
  40   
  41      As well as being a ProcessedDocument representing the document in the 
  42      database, the result has several members which may be used to get 
  43      information about how well the document matches the search: 
  44   
  45       - `rank`: The rank of the document in the search results, starting at 0 
  46         (ie, 0 is the "top" result, 1 is the second result, etc). 
  47   
  48       - `weight`: A floating point number indicating the weight of the result 
  49         document.  The value is only meaningful relative to other results for a 
  50         given search - a different search, or the same search with a different 
  51         database, may give an entirely different scale to the weights.  This 
  52         should not usually be displayed to users, but may be useful if trying to 
  53         perform advanced reweighting operations on search results. 
  54   
  55       - `percent`: A percentage value for the weight of a document.  This is 
  56         just a rescaled form of the `weight` member.  It doesn't represent any 
  57         kind of probability value; the only real meaning of the numbers is that, 
  58         within a single set of results, a document with a higher percentage 
  59         corresponds to a better match.  Because the percentage doesn't really 
  60         represent a probability, or a confidence value, it is probably unhelpful 
  61         to display it to most users, since they tend to place an over emphasis 
  62         on its meaning.  However, it is included because it may be useful 
  63         occasionally. 
  64   
  65      """ 
  67          ProcessedDocument.__init__(self, results._fieldmappings, msetitem.document) 
  68          self.rank = msetitem.rank 
  69          self.weight = msetitem.weight 
  70          self.percent = msetitem.percent 
  71          self._results = results 
   72   
  74          """Get the language that should be used for a given field. 
  75   
  76          Raises a KeyError if the field is not known. 
  77   
  78          """ 
  79          actions = self._results._conn._field_actions[field]._actions 
  80          for action, kwargslist in actions.iteritems(): 
  81              if action == FieldActions.INDEX_FREETEXT: 
  82                  for kwargs in kwargslist: 
  83                      try: 
  84                          return kwargs['language'] 
  85                      except KeyError: 
  86                          pass 
  87          return 'none' 
   88   
  89 -    def summarise(self, field, maxlen=600, hl=('<b>', '</b>'), query=None): 
   90          """Return a summarised version of the field specified. 
  91   
  92          This will return a summary of the contents of the field stored in the 
  93          search result, with words which match the query highlighted. 
  94   
  95          The maximum length of the summary (in characters) may be set using the 
  96          maxlen parameter. 
  97   
  98          The return value will be a string holding the summary, with 
  99          highlighting applied.  If there are multiple instances of the field in 
 100          the document, the instances will be joined with a newline character. 
 101           
 102          To turn off highlighting, set hl to None.  Each highlight will consist 
 103          of the first entry in the `hl` list being placed before the word, and 
 104          the second entry in the `hl` list being placed after the word. 
 105   
 106          Any XML or HTML style markup tags in the field will be stripped before 
 107          the summarisation algorithm is applied. 
 108   
 109          If `query` is supplied, it should contain a Query object, as returned 
 110          from SearchConnection.query_parse() or related methods, which will be 
 111          used as the basis of the summarisation and highlighting rather than the 
 112          query which was used for the search. 
 113   
 114          Raises KeyError if the field is not known. 
 115   
 116          """ 
 117          highlighter = _highlight.Highlighter(language_code=self._get_language(field)) 
 118          field = self.data[field] 
 119          results = [] 
 120          text = '\n'.join(field) 
 121          if query is None: 
 122              query = self._results._query 
 123          return highlighter.makeSample(text, query, maxlen, hl) 
  124   
 125 -    def highlight(self, field, hl=('<b>', '</b>'), strip_tags=False, query=None): 
  126          """Return a highlighted version of the field specified. 
 127   
 128          This will return all the contents of the field stored in the search 
 129          result, with words which match the query highlighted. 
 130   
 131          The return value will be a list of strings (corresponding to the list 
 132          of strings which is the raw field data). 
 133   
 134          Each highlight will consist of the first entry in the `hl` list being 
 135          placed before the word, and the second entry in the `hl` list being 
 136          placed after the word. 
 137   
 138          If `strip_tags` is True, any XML or HTML style markup tags in the field 
 139          will be stripped before highlighting is applied. 
 140   
 141          If `query` is supplied, it should contain a Query object, as returned 
 142          from SearchConnection.query_parse() or related methods, which will be 
 143          used as the basis of the summarisation and highlighting rather than the 
 144          query which was used for the search. 
 145   
 146          Raises KeyError if the field is not known. 
 147   
 148          """ 
 149          highlighter = _highlight.Highlighter(language_code=self._get_language(field)) 
 150          field = self.data[field] 
 151          results = [] 
 152          if query is None: 
 153              query = self._results._query 
 154          for text in field: 
 155              results.append(highlighter.highlight(text, query, hl, strip_tags)) 
 156          return results 
  157   
 159          return ('<SearchResult(rank=%d, id=%r, data=%r)>' % 
 160                  (self.rank, self.id, self.data)) 
   161   
 162   
 164      """An iterator over a set of results from a search. 
 165   
 166      """ 
 168          self._results = results 
 169          self._order = order 
 170          if self._order is None: 
 171              self._iter = iter(results._mset) 
 172          else: 
 173              self._iter = iter(self._order) 
  174   
 176          if self._order is None: 
 177              msetitem = self._iter.next() 
 178          else: 
 179              index = self._iter.next() 
 180              msetitem = self._results._mset.get_hit(index) 
 181          return SearchResult(msetitem, self._results) 
   182   
 183   
 185      """Get the significant digits of value which are constrained by the 
 186      (inclusive) lower and upper bounds. 
 187   
 188      If there are no significant digits which are definitely within the 
 189      bounds, exactly one significant digit will be returned in the result. 
 190   
 191      >>> _get_significant_digits(15,15,15) 
 192      15 
 193      >>> _get_significant_digits(15,15,17) 
 194      20 
 195      >>> _get_significant_digits(4777,208,6000) 
 196      5000 
 197      >>> _get_significant_digits(4777,4755,4790) 
 198      4800 
 199      >>> _get_significant_digits(4707,4695,4710) 
 200      4700 
 201      >>> _get_significant_digits(4719,4717,4727) 
 202      4720 
 203      >>> _get_significant_digits(0,0,0) 
 204      0 
 205      >>> _get_significant_digits(9,9,10) 
 206      9 
 207      >>> _get_significant_digits(9,9,100) 
 208      9 
 209   
 210      """ 
 211      assert(lower <= value) 
 212      assert(value <= upper) 
 213      diff = upper - lower 
 214   
 215       
 216       
 217      if diff == 0: 
 218          pos_pow_10 = 1 
 219      else: 
 220          pos_pow_10 = int(10 ** math.ceil(math.log10(diff))) 
 221   
 222       
 223       
 224      if pos_pow_10 > value: 
 225          if value == 0: 
 226              pos_pow_10 = 1 
 227          else: 
 228              pos_pow_10 = int(10 ** math.floor(math.log10(value))) 
 229   
 230       
 231      return ((value + pos_pow_10 // 2) // pos_pow_10) * pos_pow_10 
  232   
 234      """A set of results of a search. 
 235   
 236      """ 
 237 -    def __init__(self, conn, enq, query, mset, fieldmappings, tagspy, 
 238                   tagfields, facetspy, facetfields, facethierarchy, 
 239                   facetassocs): 
  240          self._conn = conn 
 241          self._enq = enq 
 242          self._query = query 
 243          self._mset = mset 
 244          self._mset_order = None 
 245          self._fieldmappings = fieldmappings 
 246          self._tagspy = tagspy 
 247          if tagfields is None: 
 248              self._tagfields = None 
 249          else: 
 250              self._tagfields = set(tagfields) 
 251          self._facetspy = facetspy 
 252          self._facetfields = facetfields 
 253          self._facethierarchy = facethierarchy 
 254          self._facetassocs = facetassocs 
 255          self._numeric_ranges_built = {} 
  256   
 257 -    def _cluster(self, num_clusters, maxdocs, fields=None): 
  258          """Cluster results based on similarity. 
 259   
 260          Note: this method is experimental, and will probably disappear or 
 261          change in the future. 
 262   
 263          The number of clusters is specified by num_clusters: unless there are 
 264          too few results, there will be exaclty this number of clusters in the 
 265          result. 
 266   
 267          """ 
 268          clusterer = _xapian.ClusterSingleLink() 
 269          xapclusters = _xapian.ClusterAssignments() 
 270          docsim = _xapian.DocSimCosine() 
 271          source = _xapian.MSetDocumentSource(self._mset, maxdocs) 
 272   
 273          if fields is None: 
 274              clusterer.cluster(self._conn._index, xapclusters, docsim, source, num_clusters) 
 275          else: 
 276              decider = self._make_expand_decider(fields) 
 277              clusterer.cluster(self._conn._index, xapclusters, docsim, source, decider, num_clusters) 
 278   
 279          newid = 0 
 280          idmap = {} 
 281          clusters = {} 
 282          for item in self._mset: 
 283              docid = item.docid 
 284              clusterid = xapclusters.cluster(docid) 
 285              if clusterid not in idmap: 
 286                  idmap[clusterid] = newid 
 287                  newid += 1 
 288              clusterid = idmap[clusterid] 
 289              if clusterid not in clusters: 
 290                  clusters[clusterid] = [] 
 291              clusters[clusterid].append(item.rank) 
 292          return clusters 
  293   
 295          """Reorder the mset based on some clusters. 
 296   
 297          """ 
 298          if self.startrank != 0: 
 299              raise _errors.SearchError("startrank must be zero to reorder by clusters") 
 300          reordered = False 
 301          tophits = [] 
 302          nottophits = [] 
 303   
 304          clusterstarts = dict(((c[0], None) for c in clusters.itervalues())) 
 305          for i in xrange(self.endrank): 
 306              if i in clusterstarts: 
 307                  tophits.append(i) 
 308              else: 
 309                  nottophits.append(i) 
 310          self._mset_order = tophits 
 311          self._mset_order.extend(nottophits) 
  312   
 314          """Make an expand decider which accepts only terms in the specified 
 315          field. 
 316   
 317          """ 
 318          prefixes = {} 
 319          if isinstance(fields, basestring): 
 320              fields = [fields] 
 321          for field in fields: 
 322              try: 
 323                  actions = self._conn._field_actions[field]._actions 
 324              except KeyError: 
 325                  continue 
 326              for action, kwargslist in actions.iteritems(): 
 327                  if action == FieldActions.INDEX_FREETEXT: 
 328                      prefix = self._conn._field_mappings.get_prefix(field) 
 329                      prefixes[prefix] = None 
 330                      prefixes['Z' + prefix] = None 
 331                  if action in (FieldActions.INDEX_EXACT, 
 332                                FieldActions.TAG, 
 333                                FieldActions.FACET,): 
 334                      prefix = self._conn._field_mappings.get_prefix(field) 
 335                      prefixes[prefix] = None 
 336          prefix_re = _re.compile('|'.join([_re.escape(x) + '[^A-Z]' for x in prefixes.keys()])) 
 337          class decider(_xapian.ExpandDecider): 
 338              def __call__(self, term): 
 339                  return prefix_re.match(term) is not None 
   340          return decider() 
 341   
 344          """Reorder results based on similarity. 
 345   
 346          The top `count` documents will be chosen such that they are relatively 
 347          dissimilar.  `maxcount` documents will be considered for moving around, 
 348          and `max_similarity` is a value between 0 and 1 indicating the maximum 
 349          similarity to the previous document before a document is moved down the 
 350          result set. 
 351   
 352          Note: this method is experimental, and will probably disappear or 
 353          change in the future. 
 354   
 355          """ 
 356          if self.startrank != 0: 
 357              raise _errors.SearchError("startrank must be zero to reorder by similiarity") 
 358          ds = _xapian.DocSimCosine() 
 359          ds.set_termfreqsource(_xapian.DatabaseTermFreqSource(self._conn._index)) 
 360   
 361          if fields is not None: 
 362              ds.set_expand_decider(self._make_expand_decider(fields)) 
 363   
 364          tophits = [] 
 365          nottophits = [] 
 366          full = False 
 367          reordered = False 
 368   
 369          sim_count = 0 
 370          new_order = [] 
 371          end = min(self.endrank, maxcount) 
 372          for i in xrange(end): 
 373              if full: 
 374                  new_order.append(i) 
 375                  continue 
 376              hit = self._mset.get_hit(i) 
 377              if len(tophits) == 0: 
 378                  tophits.append(hit) 
 379                  continue 
 380   
 381               
 382              maxsim = 0.0 
 383              for tophit in tophits[-1:]: 
 384                  sim_count += 1 
 385                  sim = ds.similarity(hit.document, tophit.document) 
 386                  if sim > maxsim: 
 387                      maxsim = sim 
 388   
 389               
 390              if maxsim < max_similarity: 
 391                  tophits.append(hit) 
 392              else: 
 393                  nottophits.append(hit) 
 394                  reordered = True 
 395   
 396               
 397              if len(tophits) >= count: 
 398                  for hit in tophits: 
 399                      new_order.append(hit.rank) 
 400                  for hit in nottophits: 
 401                      new_order.append(hit.rank) 
 402                  full = True 
 403          if not full: 
 404              for hit in tophits: 
 405                  new_order.append(hit.rank) 
 406              for hit in nottophits: 
 407                  new_order.append(hit.rank) 
 408          if end != self.endrank: 
 409              new_order.extend(range(end, self.endrank)) 
 410          assert len(new_order) == self.endrank 
 411          if reordered: 
 412              self._mset_order = new_order 
 413          else: 
 414              assert new_order == range(self.endrank) 
  415   
 417          return ("<SearchResults(startrank=%d, " 
 418                  "endrank=%d, " 
 419                  "more_matches=%s, " 
 420                  "matches_lower_bound=%d, " 
 421                  "matches_upper_bound=%d, " 
 422                  "matches_estimated=%d, " 
 423                  "estimate_is_exact=%s)>" % 
 424                  ( 
 425                   self.startrank, 
 426                   self.endrank, 
 427                   self.more_matches, 
 428                   self.matches_lower_bound, 
 429                   self.matches_upper_bound, 
 430                   self.matches_estimated, 
 431                   self.estimate_is_exact, 
 432                  )) 
  433   
 438      more_matches = property(_get_more_matches, doc= 
 439      """Check whether there are further matches after those in this result set. 
 440   
 441      """) 
 442   
 444          return self._mset.get_firstitem() 
  445      startrank = property(_get_startrank, doc= 
 446      """Get the rank of the first item in the search results. 
 447   
 448      This corresponds to the "startrank" parameter passed to the search() method. 
 449   
 450      """) 
 451   
 453          return self._mset.get_firstitem() + len(self._mset) 
  454      endrank = property(_get_endrank, doc= 
 455      """Get the rank of the item after the end of the search results. 
 456   
 457      If there are sufficient results in the index, this corresponds to the 
 458      "endrank" parameter passed to the search() method. 
 459   
 460      """) 
 461   
 463          return self._mset.get_matches_lower_bound() 
  464      matches_lower_bound = property(_get_lower_bound, doc= 
 465      """Get a lower bound on the total number of matching documents. 
 466   
 467      """) 
 468   
 470          return self._mset.get_matches_upper_bound() 
  471      matches_upper_bound = property(_get_upper_bound, doc= 
 472      """Get an upper bound on the total number of matching documents. 
 473   
 474      """) 
 475   
 477          lower = self._mset.get_matches_lower_bound() 
 478          upper = self._mset.get_matches_upper_bound() 
 479          est = self._mset.get_matches_estimated() 
 480          return _get_significant_digits(est, lower, upper) 
  481      matches_human_readable_estimate = property(_get_human_readable_estimate, 
 482                                                 doc= 
 483      """Get a human readable estimate of the number of matching documents. 
 484   
 485      This consists of the value returned by the "matches_estimated" property, 
 486      rounded to an appropriate number of significant digits (as determined by 
 487      the values of the "matches_lower_bound" and "matches_upper_bound" 
 488      properties). 
 489   
 490      """) 
 491   
 493          return self._mset.get_matches_estimated() 
  494      matches_estimated = property(_get_estimated, doc= 
 495      """Get an estimate for the total number of matching documents. 
 496   
 497      """) 
 498   
 500          return self._mset.get_matches_lower_bound() == \ 
 501                 self._mset.get_matches_upper_bound() 
  502      estimate_is_exact = property(_estimate_is_exact, doc= 
 503      """Check whether the estimated number of matching documents is exact. 
 504   
 505      If this returns true, the estimate given by the `matches_estimated` 
 506      property is guaranteed to be correct. 
 507   
 508      If this returns false, it is possible that the actual number of matching 
 509      documents is different from the number given by the `matches_estimated` 
 510      property. 
 511   
 512      """) 
 513   
 515          """Get the hit with a given index. 
 516   
 517          """ 
 518          if self._mset_order is None: 
 519              msetitem = self._mset.get_hit(index) 
 520          else: 
 521              msetitem = self._mset.get_hit(self._mset_order[index]) 
 522          return SearchResult(msetitem, self) 
  523      __getitem__ = get_hit 
 524   
 526          """Get an iterator over the hits in the search result. 
 527   
 528          The iterator returns the results in increasing order of rank. 
 529   
 530          """ 
 531          return SearchResultIter(self, self._mset_order) 
  532   
 534          """Get the number of hits in the search result. 
 535   
 536          Note that this is not (usually) the number of matching documents for 
 537          the search.  If startrank is non-zero, it's not even the rank of the 
 538          last document in the search result.  It's simply the number of hits 
 539          stored in the search result. 
 540   
 541          It is, however, the number of items returned by the iterator produced 
 542          by calling iter() on this SearchResults object. 
 543   
 544          """ 
 545          return len(self._mset) 
  546   
 565   
 566 -    def get_suggested_facets(self, maxfacets=5, desired_num_of_categories=7, 
 567                               required_facets=None): 
  568          """Get a suggested set of facets, to present to the user. 
 569   
 570          This returns a list, in descending order of the usefulness of the 
 571          facet, in which each item is a tuple holding: 
 572   
 573           - fieldname of facet. 
 574           - sequence of 2-tuples holding the suggested values or ranges for that 
 575             field: 
 576   
 577             For facets of type 'string', the first item in the 2-tuple will 
 578             simply be the string supplied when the facet value was added to its 
 579             document.  For facets of type 'float', it will be a 2-tuple, holding 
 580             floats giving the start and end of the suggested value range. 
 581   
 582             The second item in the 2-tuple will be the frequency of the facet 
 583             value or range in the result set. 
 584   
 585          If required_facets is not None, it must be a field name, or a sequence 
 586          of field names.  Any field names mentioned in required_facets will be 
 587          returned if there are any facet values at all in the search results for 
 588          that field.  The facet will only be omitted if there are no facet 
 589          values at all for the field. 
 590   
 591          The value of maxfacets will be respected as far as possible; the 
 592          exception is that if there are too many fields listed in 
 593          required_facets with at least one value in the search results, extra 
 594          facets will be returned (ie, obeying the required_facets parameter is 
 595          considered more important than the maxfacets parameter). 
 596   
 597          If facet_hierarchy was indicated when search() was called, and the 
 598          query included facets, then only subfacets of those query facets and 
 599          top-level facets will be included in the returned list. Furthermore 
 600          top-level facets will only be returned if there are remaining places 
 601          in the list after it has been filled with subfacets. Note that 
 602          required_facets is still respected regardless of the facet hierarchy. 
 603   
 604          If a query type was specified when search() was called, and the query 
 605          included facets, then facets with an association of Never to the 
 606          query type are never returned, even if mentioned in required_facets. 
 607          Facets with an association of Preferred are listed before others in 
 608          the returned list. 
 609   
 610          """ 
 611          if 'facets' in _checkxapian.missing_features: 
 612              raise errors.SearchError("Facets unsupported with this release of xapian") 
 613          if self._facetspy is None: 
 614              raise _errors.SearchError("Facet selection wasn't enabled when the search was run") 
 615          if isinstance(required_facets, basestring): 
 616              required_facets = [required_facets] 
 617          scores = [] 
 618          facettypes = {} 
 619          for field, slot, kwargslist in self._facetfields: 
 620              type = None 
 621              for kwargs in kwargslist: 
 622                  type = kwargs.get('type', None) 
 623                  if type is not None: break 
 624              if type is None: type = 'string' 
 625   
 626              if type == 'float': 
 627                  if field not in self._numeric_ranges_built: 
 628                      self._facetspy.build_numeric_ranges(slot, desired_num_of_categories) 
 629                      self._numeric_ranges_built[field] = None 
 630              facettypes[field] = type 
 631              score = self._facetspy.score_categorisation(slot, desired_num_of_categories) 
 632              scores.append((score, field, slot)) 
 633   
 634           
 635           
 636          if self._facethierarchy: 
 637               
 638              scores = [(tuple[-2] not in self._facethierarchy,) + tuple for tuple in scores] 
 639          if self._facetassocs: 
 640              preferred = _indexerconnection.IndexerConnection.FacetQueryType_Preferred 
 641              scores = [(self._facetassocs.get(tuple[-2]) != preferred,) + tuple for tuple in scores] 
 642          scores.sort() 
 643          if self._facethierarchy: 
 644              index = 1 
 645          else: 
 646              index = 0 
 647          if self._facetassocs: 
 648              index += 1 
 649          if index > 0: 
 650              scores = [tuple[index:] for tuple in scores] 
 651   
 652          results = [] 
 653          required_results = [] 
 654          for score, field, slot in scores: 
 655               
 656              required = False 
 657              if required_facets is not None: 
 658                  required = field in required_facets 
 659   
 660               
 661              if not required and len(results) + len(required_results) >= maxfacets: 
 662                  continue 
 663   
 664               
 665              values = self._facetspy.get_values_as_dict(slot) 
 666              if field in self._numeric_ranges_built: 
 667                  if '' in values: 
 668                      del values[''] 
 669   
 670               
 671               
 672              if required: 
 673                  if len(values) < 1: 
 674                      continue 
 675              else: 
 676                  if len(values) <= 1: 
 677                      continue 
 678   
 679              newvalues = [] 
 680              if facettypes[field] == 'float': 
 681                   
 682                   
 683                  for value, frequency in values.iteritems(): 
 684                      if len(value) <= 9: 
 685                          value1 = _log(_xapian.sortable_unserialise, value) 
 686                          value2 = value1 
 687                      else: 
 688                          value1 = _log(_xapian.sortable_unserialise, value[:9]) 
 689                          value2 = _log(_xapian.sortable_unserialise, value[9:]) 
 690                      newvalues.append(((value1, value2), frequency)) 
 691              else: 
 692                  for value, frequency in values.iteritems(): 
 693                      newvalues.append((value, frequency)) 
 694   
 695              newvalues.sort() 
 696              if required: 
 697                  required_results.append((score, field, newvalues)) 
 698              else: 
 699                  results.append((score, field, newvalues)) 
 700   
 701           
 702           
 703          maxfacets = maxfacets - len(required_results) 
 704          if maxfacets <= 0: 
 705              results = required_results 
 706          else: 
 707              results = results[:maxfacets] 
 708              results.extend(required_results) 
 709              results.sort() 
 710   
 711           
 712           
 713          results = [(field, newvalues) for (score, field, newvalues) in results] 
 714          return results 
  715   
 716   
 718      """A connection to the search engine for searching. 
 719   
 720      The connection will access a view of the database. 
 721   
 722      """ 
 723      _qp_flags_base = _xapian.QueryParser.FLAG_LOVEHATE 
 724      _qp_flags_phrase = _xapian.QueryParser.FLAG_PHRASE 
 725      _qp_flags_synonym = (_xapian.QueryParser.FLAG_AUTO_SYNONYMS | 
 726                           _xapian.QueryParser.FLAG_AUTO_MULTIWORD_SYNONYMS) 
 727      _qp_flags_bool = _xapian.QueryParser.FLAG_BOOLEAN 
 728   
 729      _index = None 
 730   
 732          """Create a new connection to the index for searching. 
 733   
 734          There may only an arbitrary number of search connections for a 
 735          particular database open at a given time (regardless of whether there 
 736          is a connection for indexing open as well). 
 737   
 738          If the database doesn't exist, an exception will be raised. 
 739   
 740          """ 
 741          self._index = _log(_xapian.Database, indexpath) 
 742          self._indexpath = indexpath 
 743   
 744           
 745          self._load_config() 
 746   
 747          self._close_handlers = [] 
  748   
 751   
 753          """Append a callback to the list of close handlers. 
 754   
 755          These will be called when the SearchConnection is closed.  This happens 
 756          when the close() method is called, or when the SearchConnection object 
 757          is deleted.  The callback will be passed two arguments: the path to the 
 758          SearchConnection object, and the userdata supplied to this method. 
 759   
 760          The handlers will be called in the order in which they were added. 
 761   
 762          The handlers will be called after the connection has been closed, so 
 763          cannot prevent it closing: their return value will be ignored.  In 
 764          addition, they should not raise any exceptions. 
 765   
 766          """ 
 767          self._close_handlers.append((handler, userdata)) 
  768   
 770          """Get the sort type that should be used for a given field. 
 771   
 772          """ 
 773          try: 
 774              actions = self._field_actions[field]._actions 
 775          except KeyError: 
 776              actions = {} 
 777          for action, kwargslist in actions.iteritems(): 
 778              if action == FieldActions.SORT_AND_COLLAPSE: 
 779                  for kwargs in kwargslist: 
 780                      return kwargs['type'] 
  781   
 783          """Load the configuration for the database. 
 784   
 785          """ 
 786           
 787           
 788          assert self._index is not None 
 789   
 790          config_str = _log(self._index.get_metadata, '_xappy_config') 
 791          if len(config_str) == 0: 
 792              self._field_actions = {} 
 793              self._field_mappings = _fieldmappings.FieldMappings() 
 794              self._facet_hierarchy = {} 
 795              self._facet_query_table = {} 
 796              return 
 797   
 798          try: 
 799              (self._field_actions, mappings, self._facet_hierarchy, self._facet_query_table, self._next_docid) = _cPickle.loads(config_str) 
 800          except ValueError: 
 801               
 802              (self._field_actions, mappings, self._next_docid) = _cPickle.loads(config_str) 
 803              self._facet_hierarchy = {} 
 804              self._facet_query_table = {} 
 805          self._field_mappings = _fieldmappings.FieldMappings(mappings) 
  806   
 808          """Reopen the connection. 
 809   
 810          This updates the revision of the index which the connection references 
 811          to the latest flushed revision. 
 812   
 813          """ 
 814          if self._index is None: 
 815              raise _errors.SearchError("SearchConnection has been closed") 
 816          self._index.reopen() 
 817           
 818          self._load_config() 
  819           
 821          """Close the connection to the database. 
 822   
 823          It is important to call this method before allowing the class to be 
 824          garbage collected to ensure that the connection is cleaned up promptly. 
 825   
 826          No other methods may be called on the connection after this has been 
 827          called.  (It is permissible to call close() multiple times, but 
 828          only the first call will have any effect.) 
 829   
 830          If an exception occurs, the database will be closed, but changes since 
 831          the last call to flush may be lost. 
 832   
 833          """ 
 834          if self._index is None: 
 835              return 
 836   
 837           
 838          indexpath = self._indexpath 
 839   
 840           
 841           
 842           
 843           
 844           
 845           
 846           
 847          self._index = None 
 848          self._indexpath = None 
 849          self._field_actions = None 
 850          self._field_mappings = None 
 851   
 852           
 853          for handler, userdata in self._close_handlers: 
 854              try: 
 855                  handler(indexpath, userdata) 
 856              except Exception, e: 
 857                  import sys, traceback 
 858                  print >>sys.stderr, "WARNING: unhandled exception in handler called by SearchConnection.close(): %s" % traceback.format_exception_only(type(e), e) 
  859   
 861          """Count the number of documents in the database. 
 862   
 863          This count will include documents which have been added or removed but 
 864          not yet flushed(). 
 865   
 866          """ 
 867          if self._index is None: 
 868              raise _errors.SearchError("SearchConnection has been closed") 
 869          return self._index.get_doccount() 
  870   
 871      OP_AND = _xapian.Query.OP_AND 
 872      OP_OR = _xapian.Query.OP_OR 
 874          """Build a composite query from a list of queries. 
 875   
 876          The queries are combined with the supplied operator, which is either 
 877          SearchConnection.OP_AND or SearchConnection.OP_OR. 
 878   
 879          """ 
 880          if self._index is None: 
 881              raise _errors.SearchError("SearchConnection has been closed") 
 882          return _log(_xapian.Query, operator, list(queries)) 
  883   
 885          """Build a query which modifies the weights of a subquery. 
 886   
 887          This produces a query which returns the same documents as the subquery, 
 888          and in the same order, but with the weights assigned to each document 
 889          multiplied by the value of "multiplier".  "multiplier" may be any floating 
 890          point value, but negative values will be clipped to 0, since Xapian 
 891          doesn't support negative weights. 
 892   
 893          This can be useful when producing queries to be combined with 
 894          query_composite, because it allows the relative importance of parts of 
 895          the query to be adjusted. 
 896   
 897          """ 
 898          return _log(_xapian.Query, _xapian.Query.OP_SCALE_WEIGHT, query, multiplier) 
  899   
 901          """Filter a query with another query. 
 902   
 903          If exclude is False (or not specified), documents will only match the 
 904          resulting query if they match the both the first and second query: the 
 905          results of the first query are "filtered" to only include those which 
 906          also match the second query. 
 907   
 908          If exclude is True, documents will only match the resulting query if 
 909          they match the first query, but not the second query: the results of 
 910          the first query are "filtered" to only include those which do not match 
 911          the second query. 
 912           
 913          Documents will always be weighted according to only the first query. 
 914   
 915          - `query`: The query to filter. 
 916          - `filter`: The filter to apply to the query. 
 917          - `exclude`: If True, the sense of the filter is reversed - only 
 918            documents which do not match the second query will be returned.  
 919   
 920          """ 
 921          if self._index is None: 
 922              raise _errors.SearchError("SearchConnection has been closed") 
 923          if not isinstance(filter, _xapian.Query): 
 924              raise _errors.SearchError("Filter must be a Xapian Query object") 
 925          if exclude: 
 926              return _log(_xapian.Query, _xapian.Query.OP_AND_NOT, query, filter) 
 927          else: 
 928              return _log(_xapian.Query, _xapian.Query.OP_FILTER, query, filter) 
  929   
 931          """Adjust the weights of one query with a secondary query. 
 932   
 933          Documents will be returned from the resulting query if and only if they 
 934          match the primary query (specified by the "primary" parameter). 
 935          However, the weights (and hence, the relevance rankings) of the 
 936          documents will be adjusted by adding weights from the secondary query 
 937          (specified by the "secondary" parameter). 
 938   
 939          """ 
 940          if self._index is None: 
 941              raise _errors.SearchError("SearchConnection has been closed") 
 942          return _log(_xapian.Query, _xapian.Query.OP_AND_MAYBE, primary, secondary) 
  943   
 945          """Create a query for a range search. 
 946           
 947          This creates a query which matches only those documents which have a 
 948          field value in the specified range. 
 949   
 950          Begin and end must be appropriate values for the field, according to 
 951          the 'type' parameter supplied to the SORTABLE action for the field. 
 952   
 953          The begin and end values are both inclusive - any documents with a 
 954          value equal to begin or end will be returned (unless end is less than 
 955          begin, in which case no documents will be returned). 
 956   
 957          Begin or end may be set to None in order to create an open-ended 
 958          range.  (They may also both be set to None, which will generate a query 
 959          which matches all documents containing any value for the field.) 
 960   
 961          """ 
 962          if self._index is None: 
 963              raise _errors.SearchError("SearchConnection has been closed") 
 964   
 965          if begin is None and end is None: 
 966               
 967              return _log(_xapian.Query, '') 
 968   
 969          try: 
 970              slot = self._field_mappings.get_slot(field, 'collsort') 
 971          except KeyError: 
 972               
 973              return _log(_xapian.Query) 
 974   
 975          sorttype = self._get_sort_type(field) 
 976          marshaller = SortableMarshaller(False) 
 977          fn = marshaller.get_marshall_function(field, sorttype) 
 978   
 979          if begin is not None: 
 980              begin = fn(field, begin) 
 981          if end is not None: 
 982              end = fn(field, end) 
 983   
 984          if begin is None: 
 985              return _log(_xapian.Query, _xapian.Query.OP_VALUE_LE, slot, end) 
 986   
 987          if end is None: 
 988              return _log(_xapian.Query, _xapian.Query.OP_VALUE_GE, slot, begin) 
 989   
 990          return _log(_xapian.Query, _xapian.Query.OP_VALUE_RANGE, slot, begin, end) 
  991   
 993          """Create a query for a facet value. 
 994           
 995          This creates a query which matches only those documents which have a 
 996          facet value in the specified range. 
 997   
 998          For a numeric range facet, val should be a tuple holding the start and 
 999          end of the range, or a comma separated string holding two floating 
1000          point values.  For other facets, val should be the value to look 
1001          for. 
1002   
1003          The start and end values are both inclusive - any documents with a 
1004          value equal to start or end will be returned (unless end is less than 
1005          start, in which case no documents will be returned). 
1006   
1007          """ 
1008          if self._index is None: 
1009              raise _errors.SearchError("SearchConnection has been closed") 
1010          if 'facets' in _checkxapian.missing_features: 
1011              raise errors.SearchError("Facets unsupported with this release of xapian") 
1012   
1013          try: 
1014              actions = self._field_actions[field]._actions 
1015          except KeyError: 
1016              actions = {} 
1017          facettype = None 
1018          for action, kwargslist in actions.iteritems(): 
1019              if action == FieldActions.FACET: 
1020                  for kwargs in kwargslist: 
1021                      facettype = kwargs.get('type', None) 
1022                      if facettype is not None: 
1023                          break 
1024              if facettype is not None: 
1025                  break 
1026   
1027          if facettype == 'float': 
1028              if isinstance(val, basestring): 
1029                  val = [float(v) for v in val.split(',', 2)] 
1030              assert(len(val) == 2) 
1031              try: 
1032                  slot = self._field_mappings.get_slot(field, 'facet') 
1033              except KeyError: 
1034                  return _log(_xapian.Query) 
1035               
1036              sorttype = 'float' 
1037              marshaller = SortableMarshaller(False) 
1038              fn = marshaller.get_marshall_function(field, sorttype) 
1039              begin = fn(field, val[0]) 
1040              end = fn(field, val[1]) 
1041              return _log(_xapian.Query, _xapian.Query.OP_VALUE_RANGE, slot, begin, end) 
1042          else: 
1043              assert(facettype == 'string' or facettype is None) 
1044              prefix = self._field_mappings.get_prefix(field) 
1045              return _log(_xapian.Query, prefix + val.lower()) 
 1046   
1047   
1050          """Prepare (and return) a query parser using the specified fields and 
1051          operator. 
1052   
1053          """ 
1054          if self._index is None: 
1055              raise _errors.SearchError("SearchConnection has been closed") 
1056   
1057          if isinstance(allow, basestring): 
1058              allow = (allow, ) 
1059          if isinstance(deny, basestring): 
1060              deny = (deny, ) 
1061          if allow is not None and len(allow) == 0: 
1062              allow = None 
1063          if deny is not None and len(deny) == 0: 
1064              deny = None 
1065          if allow is not None and deny is not None: 
1066              raise _errors.SearchError("Cannot specify both `allow` and `deny` " 
1067                                        "(got %r and %r)" % (allow, deny)) 
1068   
1069          if isinstance(default_allow, basestring): 
1070              default_allow = (default_allow, ) 
1071          if isinstance(default_deny, basestring): 
1072              default_deny = (default_deny, ) 
1073          if default_allow is not None and len(default_allow) == 0: 
1074              default_allow = None 
1075          if default_deny is not None and len(default_deny) == 0: 
1076              default_deny = None 
1077          if default_allow is not None and default_deny is not None: 
1078              raise _errors.SearchError("Cannot specify both `default_allow` and `default_deny` " 
1079                                        "(got %r and %r)" % (default_allow, default_deny)) 
1080   
1081          qp = _log(_xapian.QueryParser) 
1082          qp.set_database(self._index) 
1083          qp.set_default_op(default_op) 
1084   
1085          if allow is None: 
1086              allow = [key for key in self._field_actions] 
1087          if deny is not None: 
1088              allow = [key for key in allow if key not in deny] 
1089   
1090          for field in allow: 
1091              try: 
1092                  actions = self._field_actions[field]._actions 
1093              except KeyError: 
1094                  actions = {} 
1095              for action, kwargslist in actions.iteritems(): 
1096                  if action == FieldActions.INDEX_EXACT: 
1097                       
1098                       
1099                      qp.add_prefix(field, self._field_mappings.get_prefix(field)) 
1100                  if action == FieldActions.INDEX_FREETEXT: 
1101                      allow_field_specific = True 
1102                      for kwargs in kwargslist: 
1103                          allow_field_specific = allow_field_specific or kwargs.get('allow_field_specific', True) 
1104                      if not allow_field_specific: 
1105                          continue 
1106                      qp.add_prefix(field, self._field_mappings.get_prefix(field)) 
1107                      for kwargs in kwargslist: 
1108                          try: 
1109                              lang = kwargs['language'] 
1110                              my_stemmer = _log(_xapian.Stem, lang) 
1111                              qp.my_stemmer = my_stemmer 
1112                              qp.set_stemmer(my_stemmer) 
1113                              qp.set_stemming_strategy(qp.STEM_SOME) 
1114                          except KeyError: 
1115                              pass 
1116   
1117          if default_allow is not None or default_deny is not None: 
1118              if default_allow is None: 
1119                  default_allow = [key for key in self._field_actions] 
1120              if default_deny is not None: 
1121                  default_allow = [key for key in default_allow if key not in default_deny] 
1122              for field in default_allow: 
1123                  try: 
1124                      actions = self._field_actions[field]._actions 
1125                  except KeyError: 
1126                      actions = {} 
1127                  for action, kwargslist in actions.iteritems(): 
1128                      if action == FieldActions.INDEX_FREETEXT: 
1129                          qp.add_prefix('', self._field_mappings.get_prefix(field)) 
1130                           
1131   
1132          return qp 
 1133   
1135          """Parse a query, with an optional prefix. 
1136   
1137          """ 
1138          if prefix is None: 
1139              return qp.parse_query(string, flags) 
1140          else: 
1141              return qp.parse_query(string, flags, prefix) 
 1142   
1144          """Parse a query with various flags. 
1145           
1146          If the initial boolean pass fails, fall back to not using boolean 
1147          operators. 
1148   
1149          """ 
1150          try: 
1151              q1 = self._query_parse_with_prefix(qp, string, 
1152                                                 self._qp_flags_base | 
1153                                                 self._qp_flags_phrase | 
1154                                                 self._qp_flags_synonym | 
1155                                                 self._qp_flags_bool, 
1156                                                 prefix) 
1157          except _xapian.QueryParserError, e: 
1158               
1159               
1160              q1 = self._query_parse_with_prefix(qp, string, 
1161                                                 self._qp_flags_base | 
1162                                                 self._qp_flags_phrase | 
1163                                                 self._qp_flags_synonym, 
1164                                                 prefix) 
1165   
1166          qp.set_stemming_strategy(qp.STEM_NONE) 
1167          try: 
1168              q2 = self._query_parse_with_prefix(qp, string, 
1169                                                 self._qp_flags_base | 
1170                                                 self._qp_flags_bool, 
1171                                                 prefix) 
1172          except _xapian.QueryParserError, e: 
1173               
1174               
1175              q2 = self._query_parse_with_prefix(qp, string, 
1176                                                 self._qp_flags_base, 
1177                                                 prefix) 
1178   
1179          return _log(_xapian.Query, _xapian.Query.OP_AND_MAYBE, q1, q2) 
 1180   
1181 -    def query_parse(self, string, allow=None, deny=None, default_op=OP_AND, 
1182                      default_allow=None, default_deny=None): 
 1183          """Parse a query string. 
1184   
1185          This is intended for parsing queries entered by a user.  If you wish to 
1186          combine structured queries, it is generally better to use the other 
1187          query building methods, such as `query_composite` (though you may wish 
1188          to create parts of the query to combine with such methods with this 
1189          method). 
1190   
1191          The string passed to this method can have various operators in it.  In 
1192          particular, it may contain field specifiers (ie, field names, followed 
1193          by a colon, followed by some text to search for in that field).  For 
1194          example, if "author" is a field in the database, the search string 
1195          could contain "author:richard", and this would be interpreted as 
1196          "search for richard in the author field".  By default, any fields in 
1197          the database which are indexed with INDEX_EXACT or INDEX_FREETEXT will 
1198          be available for field specific searching in this way - however, this 
1199          can be modified using the "allow" or "deny" parameters, and also by the 
1200          allow_field_specific tag on INDEX_FREETEXT fields. 
1201   
1202          Any text which isn't prefixed by a field specifier is used to search 
1203          the "default set" of fields.  By default, this is the full set of 
1204          fields in the database which are indexed with INDEX_FREETEXT and for 
1205          which the search_by_default flag set (ie, if the text is found in any 
1206          of those fields, the query will match).  However, this may be modified 
1207          with the "default_allow" and "default_deny" parameters.  (Note that 
1208          fields which are indexed with INDEX_EXACT aren't allowed to be used in 
1209          the default list of fields.) 
1210   
1211          - `string`: The string to parse. 
1212          - `allow`: A list of fields to allow in the query. 
1213          - `deny`: A list of fields not to allow in the query. 
1214          - `default_op`: The default operator to combine query terms with. 
1215          - `default_allow`: A list of fields to search for by default. 
1216          - `default_deny`: A list of fields not to search for by default. 
1217   
1218          Only one of `allow` and `deny` may be specified. 
1219   
1220          Only one of `default_allow` and `default_deny` may be specified. 
1221   
1222          If any of the entries in `allow` are not present in the configuration 
1223          for the database, or are not specified for indexing (either as 
1224          INDEX_EXACT or INDEX_FREETEXT), they will be ignored.  If any of the 
1225          entries in `deny` are not present in the configuration for the 
1226          database, they will be ignored. 
1227   
1228          Returns a Query object, which may be passed to the search() method, or 
1229          combined with other queries. 
1230   
1231          """ 
1232          qp = self._prepare_queryparser(allow, deny, default_op, default_allow, 
1233                                         default_deny) 
1234          return self._query_parse_with_fallback(qp, string) 
 1235   
1237          """A query for a single field. 
1238   
1239          """ 
1240          if self._index is None: 
1241              raise _errors.SearchError("SearchConnection has been closed") 
1242          try: 
1243              actions = self._field_actions[field]._actions 
1244          except KeyError: 
1245              actions = {} 
1246   
1247           
1248          for action, kwargslist in actions.iteritems(): 
1249              if action in (FieldActions.INDEX_EXACT, 
1250                            FieldActions.TAG, 
1251                            FieldActions.FACET,): 
1252                  prefix = self._field_mappings.get_prefix(field) 
1253                  if len(value) > 0: 
1254                      chval = ord(value[0]) 
1255                      if chval >= ord('A') and chval <= ord('Z'): 
1256                          prefix = prefix + ':' 
1257                  return _log(_xapian.Query, prefix + value) 
1258              if action == FieldActions.INDEX_FREETEXT: 
1259                  qp = _log(_xapian.QueryParser) 
1260                  qp.set_default_op(default_op) 
1261                  prefix = self._field_mappings.get_prefix(field) 
1262                  for kwargs in kwargslist: 
1263                      try: 
1264                          lang = kwargs['language'] 
1265                          qp.set_stemmer(_log(_xapian.Stem, lang)) 
1266                          qp.set_stemming_strategy(qp.STEM_SOME) 
1267                      except KeyError: 
1268                          pass 
1269                  return self._query_parse_with_fallback(qp, value, prefix) 
1270   
1271          return _log(_xapian.Query) 
 1272   
1273 -    def query_similar(self, ids, allow=None, deny=None, simterms=10): 
 1274          """Get a query which returns documents which are similar to others. 
1275   
1276          The list of document IDs to base the similarity search on is given in 
1277          `ids`.  This should be an iterable, holding a list of strings.  If 
1278          any of the supplied IDs cannot be found in the database, they will be 
1279          ignored.  (If no IDs can be found in the database, the resulting query 
1280          will not match any documents.) 
1281   
1282          By default, all fields which have been indexed for freetext searching 
1283          will be used for the similarity calculation.  The list of fields used 
1284          for this can be customised using the `allow` and `deny` parameters 
1285          (only one of which may be specified): 
1286   
1287          - `allow`: A list of fields to base the similarity calculation on. 
1288          - `deny`: A list of fields not to base the similarity calculation on. 
1289          - `simterms`: Number of terms to use for the similarity calculation. 
1290   
1291          For convenience, any of `ids`, `allow`, or `deny` may be strings, which 
1292          will be treated the same as a list of length 1. 
1293   
1294          Regardless of the setting of `allow` and `deny`, only fields which have 
1295          been indexed for freetext searching will be used for the similarity 
1296          measure - all other fields will always be ignored for this purpose. 
1297   
1298          """ 
1299          eterms, prefixes = self._get_eterms(ids, allow, deny, simterms) 
1300   
1301           
1302           
1303          q = _log(_xapian.Query, _xapian.Query.OP_ELITE_SET, eterms, simterms) 
1304          return q 
 1305   
1307          """Get a set of "significant" terms for a document, or documents. 
1308   
1309          This has a similar interface to query_similar(): it takes a list of 
1310          ids, and an optional specification of a set of fields to consider. 
1311          Instead of returning a query, it returns a list of terms from the 
1312          document (or documents), which appear "significant".  Roughly, 
1313          in this situation significant means that the terms occur more 
1314          frequently in the specified document than in the rest of the corpus. 
1315   
1316          The list is in decreasing order of "significance". 
1317   
1318          By default, all terms related to fields which have been indexed for 
1319          freetext searching will be considered for the list of significant 
1320          terms.  The list of fields used for this can be customised using the 
1321          `allow` and `deny` parameters (only one of which may be specified): 
1322   
1323          - `allow`: A list of fields to consider. 
1324          - `deny`: A list of fields not to consider. 
1325   
1326          For convenience, any of `ids`, `allow`, or `deny` may be strings, which 
1327          will be treated the same as a list of length 1. 
1328   
1329          Regardless of the setting of `allow` and `deny`, only fields which have 
1330          been indexed for freetext searching will be considered - all other 
1331          fields will always be ignored for this purpose. 
1332   
1333          The maximum number of terms to return may be specified by the maxterms 
1334          parameter. 
1335   
1336          """ 
1337          eterms, prefixes = self._get_eterms(ids, allow, deny, maxterms) 
1338          terms = [] 
1339          for term in eterms: 
1340              pos = 0 
1341              for char in term: 
1342                  if not char.isupper(): 
1343                      break 
1344                  pos += 1 
1345              field = prefixes[term[:pos]] 
1346              value = term[pos:] 
1347              terms.append((field, value)) 
1348          return terms 
 1349   
1351          """Get a set of terms for an expand 
1352   
1353          """ 
1354          if self._index is None: 
1355              raise _errors.SearchError("SearchConnection has been closed") 
1356          if allow is not None and deny is not None: 
1357              raise _errors.SearchError("Cannot specify both `allow` and `deny`") 
1358   
1359          if isinstance(ids, basestring): 
1360              ids = (ids, ) 
1361          if isinstance(allow, basestring): 
1362              allow = (allow, ) 
1363          if isinstance(deny, basestring): 
1364              deny = (deny, ) 
1365   
1366           
1367          if allow is None: 
1368              allow = [key for key in self._field_actions] 
1369          if deny is not None: 
1370              allow = [key for key in allow if key not in deny] 
1371   
1372           
1373          prefixes = {} 
1374          for field in allow: 
1375              try: 
1376                  actions = self._field_actions[field]._actions 
1377              except KeyError: 
1378                  actions = {} 
1379              for action, kwargslist in actions.iteritems(): 
1380                  if action == FieldActions.INDEX_FREETEXT: 
1381                      prefixes[self._field_mappings.get_prefix(field)] = field 
1382   
1383           
1384          while True: 
1385              try: 
1386                  eterms = self._perform_expand(ids, prefixes, simterms) 
1387                  break; 
1388              except _xapian.DatabaseModifiedError, e: 
1389                  self.reopen() 
1390          return eterms, prefixes 
 1391   
1396   
1398              pos = 0 
1399              for char in term: 
1400                  if not char.isupper(): 
1401                      break 
1402                  pos += 1 
1403              if term[:pos] in self._prefixes: 
1404                  return True 
1405              return False 
  1406   
1431   
1433          """A query which matches all the documents in the database. 
1434   
1435          """ 
1436          return _log(_xapian.Query, '') 
 1437   
1439          """A query which matches no documents in the database. 
1440   
1441          This may be useful as a placeholder in various situations. 
1442   
1443          """ 
1444          return _log(_xapian.Query) 
 1445   
1446 -    def spell_correct(self, querystr, allow=None, deny=None, default_op=OP_AND, 
1447                        default_allow=None, default_deny=None): 
 1448          """Correct a query spelling. 
1449   
1450          This returns a version of the query string with any misspelt words 
1451          corrected. 
1452   
1453          - `allow`: A list of fields to allow in the query. 
1454          - `deny`: A list of fields not to allow in the query. 
1455          - `default_op`: The default operator to combine query terms with. 
1456          - `default_allow`: A list of fields to search for by default. 
1457          - `default_deny`: A list of fields not to search for by default. 
1458   
1459          Only one of `allow` and `deny` may be specified. 
1460   
1461          Only one of `default_allow` and `default_deny` may be specified. 
1462   
1463          If any of the entries in `allow` are not present in the configuration 
1464          for the database, or are not specified for indexing (either as 
1465          INDEX_EXACT or INDEX_FREETEXT), they will be ignored.  If any of the 
1466          entries in `deny` are not present in the configuration for the 
1467          database, they will be ignored. 
1468   
1469          Note that it is possible that the resulting spell-corrected query will 
1470          still match no documents - the user should usually check that some 
1471          documents are matched by the corrected query before suggesting it to 
1472          users. 
1473   
1474          """ 
1475          qp = self._prepare_queryparser(allow, deny, default_op, default_allow, 
1476                                         default_deny) 
1477          try: 
1478              qp.parse_query(querystr, 
1479                             self._qp_flags_base | 
1480                             self._qp_flags_phrase | 
1481                             self._qp_flags_synonym | 
1482                             self._qp_flags_bool | 
1483                             qp.FLAG_SPELLING_CORRECTION) 
1484          except _xapian.QueryParserError: 
1485              qp.parse_query(querystr, 
1486                             self._qp_flags_base | 
1487                             self._qp_flags_phrase | 
1488                             self._qp_flags_synonym | 
1489                             qp.FLAG_SPELLING_CORRECTION) 
1490          corrected = qp.get_corrected_query_string() 
1491          if len(corrected) == 0: 
1492              if isinstance(querystr, unicode): 
1493                   
1494                   
1495                  return querystr.encode('utf-8') 
1496              return querystr 
1497          return corrected 
 1498   
1500          """Check if this database supports collapsing on a specified field. 
1501   
1502          """ 
1503          if self._index is None: 
1504              raise _errors.SearchError("SearchConnection has been closed") 
1505          try: 
1506              self._field_mappings.get_slot(field, 'collsort') 
1507          except KeyError: 
1508              return False 
1509          return True 
 1510   
1512          """Check if this database supports sorting on a specified field. 
1513   
1514          """ 
1515          if self._index is None: 
1516              raise _errors.SearchError("SearchConnection has been closed") 
1517          try: 
1518              self._field_mappings.get_slot(field, 'collsort') 
1519          except KeyError: 
1520              return False 
1521          return True 
 1522           
1524          """Get the prefix of a term. 
1525      
1526          Prefixes are any initial capital letters, with the exception that R always 
1527          ends a prefix, even if followed by capital letters. 
1528           
1529          """ 
1530          for p in xrange(len(term)): 
1531              if term[p].islower(): 
1532                  return term[:p] 
1533              elif term[p] == 'R': 
1534                  return term[:p+1] 
1535          return term 
 1536   
1538          """Check if a facet must never be returned by a particular query type. 
1539   
1540          Returns True if the facet must never be returned. 
1541   
1542          Returns False if the facet may be returned - either becuase there is no 
1543          entry for the query type, or because the entry is not 
1544          FacetQueryType_Never. 
1545   
1546          """ 
1547          if query_type is None: 
1548              return False 
1549          if query_type not in self._facet_query_table: 
1550              return False 
1551          if facet not in self._facet_query_table[query_type]: 
1552              return False 
1553          return self._facet_query_table[query_type][facet] == _indexerconnection.IndexerConnection.FacetQueryType_Never 
 1554   
1555 -    def search(self, query, startrank, endrank, 
1556                 checkatleast=0, sortby=None, collapse=None, 
1557                 gettags=None, 
1558                 getfacets=None, allowfacets=None, denyfacets=None, usesubfacets=None, 
1559                 percentcutoff=None, weightcutoff=None, 
1560                 query_type=None): 
 1561          """Perform a search, for documents matching a query. 
1562   
1563          - `query` is the query to perform. 
1564          - `startrank` is the rank of the start of the range of matching 
1565            documents to return (ie, the result with this rank will be returned). 
1566            ranks start at 0, which represents the "best" matching document. 
1567          - `endrank` is the rank at the end of the range of matching documents 
1568            to return.  This is exclusive, so the result with this rank will not 
1569            be returned. 
1570          - `checkatleast` is the minimum number of results to check for: the 
1571            estimate of the total number of matches will always be exact if 
1572            the number of matches is less than `checkatleast`.  A value of ``-1`` 
1573            can be specified for the checkatleast parameter - this has the 
1574            special meaning of "check all matches", and is equivalent to passing 
1575            the result of get_doccount(). 
1576          - `sortby` is the name of a field to sort by.  It may be preceded by a 
1577            '+' or a '-' to indicate ascending or descending order 
1578            (respectively).  If the first character is neither '+' or '-', the 
1579            sort will be in ascending order. 
1580          - `collapse` is the name of a field to collapse the result documents 
1581            on.  If this is specified, there will be at most one result in the 
1582            result set for each value of the field. 
1583          - `gettags` is the name of a field to count tag occurrences in, or a 
1584            list of fields to do so. 
1585          - `getfacets` is a boolean - if True, the matching documents will be 
1586            examined to build up a list of the facet values contained in them. 
1587          - `allowfacets` is a list of the fieldnames of facets to consider. 
1588          - `denyfacets` is a list of fieldnames of facets which will not be 
1589            considered. 
1590          - `usesubfacets` is a boolean - if True, only top-level facets and 
1591            subfacets of facets appearing in the query are considered (taking 
1592            precedence over `allowfacets` and `denyfacets`). 
1593          - `percentcutoff` is the minimum percentage a result must have to be 
1594            returned. 
1595          - `weightcutoff` is the minimum weight a result must have to be 
1596            returned. 
1597          - `query_type` is a value indicating the type of query being 
1598            performed. If not None, the value is used to influence which facets 
1599            are be returned by the get_suggested_facets() function. If the 
1600            value of `getfacets` is False, it has no effect. 
1601   
1602          If neither 'allowfacets' or 'denyfacets' is specified, all fields 
1603          holding facets will be considered (but see 'usesubfacets'). 
1604   
1605          """ 
1606          if self._index is None: 
1607              raise _errors.SearchError("SearchConnection has been closed") 
1608          if 'facets' in _checkxapian.missing_features: 
1609              if getfacets is not None or \ 
1610                 allowfacets is not None or \ 
1611                 denyfacets is not None or \ 
1612                 usesubfacets is not None or \ 
1613                 query_type is not None: 
1614                  raise errors.SearchError("Facets unsupported with this release of xapian") 
1615          if 'tags' in _checkxapian.missing_features: 
1616              if gettags is not None: 
1617                  raise errors.SearchError("Tags unsupported with this release of xapian") 
1618          if checkatleast == -1: 
1619              checkatleast = self._index.get_doccount() 
1620   
1621          enq = _log(_xapian.Enquire, self._index) 
1622          enq.set_query(query) 
1623   
1624          if sortby is not None: 
1625              asc = True 
1626              if sortby[0] == '-': 
1627                  asc = False 
1628                  sortby = sortby[1:] 
1629              elif sortby[0] == '+': 
1630                  sortby = sortby[1:] 
1631   
1632              try: 
1633                  slotnum = self._field_mappings.get_slot(sortby, 'collsort') 
1634              except KeyError: 
1635                  raise _errors.SearchError("Field %r was not indexed for sorting" % sortby) 
1636   
1637               
1638               
1639               
1640               
1641              enq.set_sort_by_value_then_relevance(slotnum, not asc) 
1642   
1643          if collapse is not None: 
1644              try: 
1645                  slotnum = self._field_mappings.get_slot(collapse, 'collsort') 
1646              except KeyError: 
1647                  raise _errors.SearchError("Field %r was not indexed for collapsing" % collapse) 
1648              enq.set_collapse_key(slotnum) 
1649   
1650          maxitems = max(endrank - startrank, 0) 
1651           
1652           
1653          checkatleast = max(checkatleast, endrank + 1) 
1654   
1655           
1656          matchspies = [] 
1657   
1658           
1659          if isinstance(gettags, basestring): 
1660              if len(gettags) != 0: 
1661                  gettags = [gettags] 
1662          tagspy = None 
1663          if gettags is not None and len(gettags) != 0: 
1664              tagspy = _log(_xapian.TermCountMatchSpy) 
1665              for field in gettags: 
1666                  try: 
1667                      prefix = self._field_mappings.get_prefix(field) 
1668                      tagspy.add_prefix(prefix) 
1669                  except KeyError: 
1670                      raise _errors.SearchError("Field %r was not indexed for tagging" % field) 
1671              matchspies.append(tagspy) 
1672   
1673   
1674           
1675          facetspy = None 
1676          facetfields = [] 
1677          if getfacets: 
1678              if allowfacets is not None and denyfacets is not None: 
1679                  raise _errors.SearchError("Cannot specify both `allowfacets` and `denyfacets`") 
1680              if allowfacets is None: 
1681                  allowfacets = [key for key in self._field_actions] 
1682              if denyfacets is not None: 
1683                  allowfacets = [key for key in allowfacets if key not in denyfacets] 
1684   
1685               
1686               
1687               
1688              queryfacets = set([None]) 
1689              if usesubfacets: 
1690                   
1691                  termsiter = query.get_terms_begin() 
1692                  termsend = query.get_terms_end() 
1693                  while termsiter != termsend: 
1694                      prefix = self._get_prefix_from_term(termsiter.get_term()) 
1695                      field = self._field_mappings.get_fieldname_from_prefix(prefix) 
1696                      if field and FieldActions.FACET in self._field_actions[field]._actions: 
1697                          queryfacets.add(field) 
1698                      termsiter.next() 
1699   
1700              for field in allowfacets: 
1701                  try: 
1702                      actions = self._field_actions[field]._actions 
1703                  except KeyError: 
1704                      actions = {} 
1705                  for action, kwargslist in actions.iteritems(): 
1706                      if action == FieldActions.FACET: 
1707                           
1708                           
1709                          if usesubfacets and self._facet_hierarchy.get(field) not in queryfacets: 
1710                              continue 
1711                           
1712                          if self._facet_query_never(field, query_type): 
1713                              continue 
1714                          slot = self._field_mappings.get_slot(field, 'facet') 
1715                          if facetspy is None: 
1716                              facetspy = _log(_xapian.CategorySelectMatchSpy) 
1717                          facettype = None 
1718                          for kwargs in kwargslist: 
1719                              facettype = kwargs.get('type', None) 
1720                              if facettype is not None: 
1721                                  break 
1722                          if facettype is None or facettype == 'string': 
1723                              facetspy.add_slot(slot, True) 
1724                          else: 
1725                              facetspy.add_slot(slot) 
1726                          facetfields.append((field, slot, kwargslist)) 
1727   
1728              if facetspy is None: 
1729                   
1730                   
1731                   
1732                   
1733                  facetspy = False 
1734              else: 
1735                  matchspies.append(facetspy) 
1736   
1737   
1738           
1739          if len(matchspies) == 0: 
1740              matchspy = None 
1741          elif len(matchspies) == 1: 
1742              matchspy = matchspies[0] 
1743          else: 
1744              matchspy = _log(_xapian.MultipleMatchDecider) 
1745              for spy in matchspies: 
1746                  matchspy.append(spy) 
1747   
1748          enq.set_docid_order(enq.DONT_CARE) 
1749   
1750           
1751          if percentcutoff is not None or weightcutoff is not None: 
1752              if percentcutoff is None: 
1753                  percentcutoff = 0 
1754              if weightcutoff is None: 
1755                  weightcutoff = 0 
1756              enq.set_cutoff(percentcutoff, weightcutoff) 
1757   
1758           
1759          while True: 
1760              try: 
1761                  if matchspy is None: 
1762                      mset = enq.get_mset(startrank, maxitems, checkatleast) 
1763                  else: 
1764                      mset = enq.get_mset(startrank, maxitems, checkatleast, 
1765                                          None, None, matchspy) 
1766                  break 
1767              except _xapian.DatabaseModifiedError, e: 
1768                  self.reopen() 
1769          facet_hierarchy = None 
1770          if usesubfacets: 
1771              facet_hierarchy = self._facet_hierarchy 
1772               
1773          return SearchResults(self, enq, query, mset, self._field_mappings, 
1774                               tagspy, gettags, facetspy, facetfields, 
1775                               facet_hierarchy, 
1776                               self._facet_query_table.get(query_type)) 
 1777   
1779          """Get an iterator which returns all the ids in the database. 
1780   
1781          The unqiue_ids are currently returned in binary lexicographical sort 
1782          order, but this should not be relied on. 
1783   
1784          Note that the iterator returned by this method may raise a 
1785          xapian.DatabaseModifiedError exception if modifications are committed 
1786          to the database while the iteration is in progress.  If this happens, 
1787          the search connection must be reopened (by calling reopen) and the 
1788          iteration restarted. 
1789   
1790          """ 
1791          if self._index is None: 
1792              raise _errors.SearchError("SearchConnection has been closed") 
1793          return _indexerconnection.PrefixedTermIter('Q', self._index.allterms()) 
 1794   
1796          """Get the document with the specified unique ID. 
1797   
1798          Raises a KeyError if there is no such document.  Otherwise, it returns 
1799          a ProcessedDocument. 
1800   
1801          """ 
1802          if self._index is None: 
1803              raise _errors.SearchError("SearchConnection has been closed") 
1804          while True: 
1805              try: 
1806                  postlist = self._index.postlist('Q' + id) 
1807                  try: 
1808                      plitem = postlist.next() 
1809                  except StopIteration: 
1810                       
1811                      raise KeyError('Unique ID %r not found' % id) 
1812                  try: 
1813                      postlist.next() 
1814                      raise _errors.IndexerError("Multiple documents "  
1815                                                 "found with same unique ID") 
1816                  except StopIteration: 
1817                       
1818                      pass 
1819   
1820                  result = ProcessedDocument(self._field_mappings) 
1821                  result.id = id 
1822                  result._doc = self._index.get_document(plitem.docid) 
1823                  return result 
1824              except _xapian.DatabaseModifiedError, e: 
1825                  self.reopen() 
 1826   
1828          """Get an iterator over the synonyms. 
1829   
1830           - `prefix`: if specified, only synonym keys with this prefix will be 
1831             returned. 
1832   
1833          The iterator returns 2-tuples, in which the first item is the key (ie, 
1834          a 2-tuple holding the term or terms which will be synonym expanded, 
1835          followed by the fieldname specified (or None if no fieldname)), and the 
1836          second item is a tuple of strings holding the synonyms for the first 
1837          item. 
1838   
1839          These return values are suitable for the dict() builtin, so you can 
1840          write things like: 
1841   
1842           >>> conn = _indexerconnection.IndexerConnection('foo') 
1843           >>> conn.add_synonym('foo', 'bar') 
1844           >>> conn.add_synonym('foo bar', 'baz') 
1845           >>> conn.add_synonym('foo bar', 'foo baz') 
1846           >>> conn.flush() 
1847           >>> conn = SearchConnection('foo') 
1848           >>> dict(conn.iter_synonyms()) 
1849           {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')} 
1850   
1851          """ 
1852          if self._index is None: 
1853              raise _errors.SearchError("SearchConnection has been closed") 
1854          return _indexerconnection.SynonymIter(self._index, self._field_mappings, prefix) 
 1855   
 1870   
1871  if __name__ == '__main__': 
1872      import doctest, sys 
1873      doctest.testmod (sys.modules[__name__]) 
1874