Logo Search packages:      
Sourcecode: pymol version File versions  Download package

Word.c

/* 
A* -------------------------------------------------------------------
B* This file contains source code for the PyMOL computer program
C* copyright 1998-2000 by Warren Lyford Delano of DeLano Scientific. 
D* -------------------------------------------------------------------
E* It is unlawful to modify or remove this copyright notice.
F* -------------------------------------------------------------------
G* Please see the accompanying LICENSE file for further information. 
H* -------------------------------------------------------------------
I* Additional authors of this source file include:
-* 
-* 
-*
Z* -------------------------------------------------------------------
*/

#include"os_predef.h"
#include"os_std.h"

#include"Base.h"
#include"OOMac.h"
#include"Word.h"
#include"Parse.h"
#include"PyMOLObject.h"
#include"MemoryDebug.h"

struct _CWord {
  int no_state_at_present;
};

typedef struct {
  int match_mode; 
  int continued;
  int literal1, literal2; /* offsets into charVLA */
  int numeric1, numeric2;
  int has1, has2;
} MatchNode;

struct _CWordMatcher {
  PyMOLGlobals *G;
  MatchNode *node;
  int n_node;
  char *charVLA;
  int n_char;
  int ignore_case;
};

#define cMatchLiteral  0
#define cMatchNumericRange  cWordMatchOptionNumericRanges
#define cMatchAlphaRange  cWordMatchOptionAlphaRanges
#define cMatchWildcard 3
#if 0
static void WordMatcherDump(CWordMatcher *I)
{
  MatchNode *cur_node = I->node;
  register int n_node = I->n_node;
  while(n_node--) {
    printf("match_mode %d\n",cur_node->match_mode);
    printf("literal1 '%s' %d literal2 '%s' %d\n",
           I->charVLA+cur_node->literal1,
           cur_node->has1,
           I->charVLA+cur_node->literal2,
           cur_node->has2
           );
    cur_node++;
  }
}
#endif

#ifndef _PYMOL_INLINE
int WordCompare(PyMOLGlobals *G,char *p,char *q,int ignCase) 
/* all things equal, shorter is smaller */
{
  int result=0;
  register char cp,cq,tlp,tlq;
  if(ignCase) {
    while((cp=*p)&&(cq=*q))   {
      p++;
      q++;
      if(cp!=cq) {
        (tlp=tolower(cp));
      (tlq=tolower(cq));
      if(tlp<tlq) 
          return -1;
        else if(tlp>tlq) {
          return 1;
        }
      }
    }
  } else {
    while((cp=*p)&&(cq=*q))   {
      p++;
      q++;
      if(cp!=cq) {
        if(cp<cq) {
          return -1;
        } else if(cp>cq) {
          return 1;
        }
      }
    }
  }
  if((!result)&&(!*p)&&(*q))
    return -1;
  else if((!result)&&(*p)&&(!*q))
    return 1;
  return 0;
}
#endif

void WordMatchOptionsConfigInteger(CWordMatchOptions *I)
{
  I->range_mode = cWordMatchOptionNumericRanges;  
  I->lists = true;
  I->ignore_case = true;
  I->wildcard = 0; /* no wildcard for numbers */
  I->allow_hyphen = true;
  I->space_lists = false;
}

void WordMatchOptionsConfigAlpha(CWordMatchOptions *I, char wildcard, int ignore_case)
{
  I->range_mode = cWordMatchOptionAlphaRanges;
  I->lists = true;
  I->ignore_case = ignore_case;
  I->wildcard = wildcard;
  I->allow_hyphen = false;
  I->space_lists = false;
}

void WordMatchOptionsConfigMixed(CWordMatchOptions *I, char wildcard, int ignore_case )
{
  I->range_mode = cWordMatchOptionNumericRanges;  
  I->lists = true;
  I->ignore_case = ignore_case;
  I->wildcard = wildcard; 
  I->allow_hyphen = true;
  I->space_lists = false;
}

void WordMatchOptionsConfigNameList(CWordMatchOptions *I, char wildcard, int ignore_case)
{
  I->range_mode = cWordMatchOptionAlphaRanges;
  I->lists = true;
  I->ignore_case = ignore_case;
  I->wildcard = wildcard;
  I->allow_hyphen = false;
  I->space_lists = true;
}

CWordMatcher *WordMatcherNew(PyMOLGlobals *G, char *st, CWordMatchOptions *option,int force)
{
  
  CWordMatcher *result = NULL;
  int needed=force;
  char wildcard = option->wildcard;

  if(wildcard==32) wildcard = 0; /* space as wildcard means no wildcard */

  if(!st) 
    return NULL;
  { /* first determine if we need to incur the overhead of the matcher */
    int escape=false;
    char *p = st;
    while((*p)&&(!needed)) {
      if(!escape) {
        switch(*p) {
        case '\\':
          escape=true;
          needed=true;
          break;
        case ',': /* list operators */
        case '+':
          if(option->lists)
            needed=true;
          break;
        case '-': /* range operators */
          if(option->allow_hyphen)
            needed=true;
          break;
        case ':':
          if(option->range_mode)
            needed=true;
          break;
        case ' ':
          if(option->space_lists)
            needed=true;
          break;
        default:
          if(*p == wildcard)
            needed=true;
          break;
        }
      } else
        escape=false;
      p++;
    }
  }

  if(needed) { /* if so, then convert the expression into a match tree */
    register int n_char = 0;
    register int n_node = 0;
    OOCalloc(G,CWordMatcher);
    I->charVLA = VLACalloc(char,10); /* auto_zeroing...*/
    I->node = VLACalloc(MatchNode,10);
    I->ignore_case = option->ignore_case;
    I->G = G;
    /* build up the matcher structure... */
    {
      char *p = st,c,*q;
      int escape = false;
      int token_active = false;
      int node_active = false;
      int char_handled = false;
      int cur_node = 0;
      int expectation = 1;

      while(1) {
        c = *p;
        char_handled = false;
        if(!escape) {
          switch(c) {
          case '\\':
            escape=true;
            char_handled = true;
            break;
          case 0:
            if(option->lists) {
              char_handled = true;
              node_active = false;
              token_active = false;
            }
            break;
          case '+':
          case ',': /* list operators */
            if(option->lists) {
              if(n_node<expectation) { /* create empty node */
                VLACheck(I->node, MatchNode, n_node);  
                n_node++;
              } else {
                expectation = n_node+1;
              }
              char_handled = true;
              node_active = false;
              token_active = false;
            }
            break;
          case ' ': /* space list */
            if(option->space_lists) {
              if(n_node<expectation) { /* create empty node */
                VLACheck(I->node, MatchNode, n_node);  
                n_node++;
              } else {
                expectation = n_node+1;
              }
              char_handled = true;
              node_active = false;
              token_active = false;
            }
            break;
          case '-': /* range operators */
            if(option->allow_hyphen && option->range_mode) {
              if(!node_active) {
                cur_node = n_node;
                VLACheck(I->node, MatchNode, cur_node);
                node_active = true;
                n_node++;
              }
              I->node[cur_node].match_mode = option->range_mode;
              token_active = false;
              char_handled = true; 
            }
            break;
          case ':':
            if(option->range_mode) {
              if(!node_active) {
                cur_node = n_node;
                VLACheck(I->node, MatchNode, cur_node);
                node_active = true;
                n_node++;
              }
              I->node[cur_node].match_mode = option->range_mode;
              token_active = false;
              char_handled = true;  
            }
            break;
          default:
            if(c == wildcard) {
              if(node_active) {
                I->node[cur_node].continued = true;
              }
              VLACheck(I->node, MatchNode, n_node);                
              cur_node = n_node;
              I->node[cur_node].match_mode = cMatchWildcard;
              n_node++;
              node_active = true;
              token_active = false;
              char_handled = true;
            }
            break;
          }
        } else 
          escape = false;
        if(!char_handled) {
          if(!token_active) {
            n_char++;
            VLACheck(I->charVLA, char, n_char);
            token_active = true;
            
            if( (!node_active) || (I->node[cur_node].match_mode == cMatchWildcard) ) {
              if(node_active) /* must be extending after a wildcard */
                I->node[cur_node].continued = true;
              else
                node_active = true;
              VLACheck(I->node, MatchNode, n_node);
              cur_node = n_node;
              I->node[cur_node].literal1 = n_char; /* the first literal */
              n_node++;
            } else {
              I->node[cur_node].literal2 = n_char; /* must be the second literal */
            }
          }
               
          /* copy character into auto-terminated string */
          VLACheck(I->charVLA, char, n_char+1);
          q = I->charVLA + n_char;
          (*q++)=c;
          n_char++;
        }
        if(c) 
          p++;
        else
          break;
      }
      if(n_node<expectation) { /* create empty node */
        VLACheck(I->node, MatchNode, n_node);  
        n_node++;
      }
        
    }
    
    {
      int a;
      int tmp;
      MatchNode *node = I->node;
      for(a=0;a<n_node;a++) {
        switch(node->match_mode) {
        case cMatchLiteral:
          if(option->range_mode == cWordMatchOptionNumericRanges ) {
            if(node->literal1) {
              if(sscanf(I->charVLA+node->literal1,"%d",&tmp)==1) {
                node->numeric1 = tmp;
                node->has1=true;
              }
            }
          }
          break;
        case cMatchAlphaRange:
          if(node->literal1)
            node->has1=true;
          if(node->literal2)
            node->has2=true;

          break;
        case cMatchNumericRange:
          if(node->literal1) {
            if(sscanf(I->charVLA+node->literal1,"%d",&tmp)==1) {
              node->numeric1 = tmp;
              node->has1=true;
            }
          }
          if(node->literal2) {
            if(sscanf(I->charVLA+node->literal2,"%d",&tmp)==1) {
              node->numeric2 = tmp;
              node->has2=true;
            }
          }
          break;
        }
        node++;
      }
    }
    I->n_char = n_char;
    I->n_node = n_node;
    /*        WordMatcherDump(I);*/
    result = I;
  }
  return result;
}

static int recursive_match(CWordMatcher *I, MatchNode *cur_node, char *text, int *value_ptr)
{
  int ignore_case = I->ignore_case;
  switch(cur_node->match_mode) {
  case cMatchLiteral:
    {
      char *q = I->charVLA + cur_node->literal1;
      char *p = text;
      while((*p)&&(*q))
        {
          if(*p!=*q)
            {
              if(!ignore_case)
                return false;
              else if(tolower(*p)!=tolower(*q))
                return false;
            }
          p++;
          q++;
        }
      if((!(*q))&&(!(*p)))
        return true;

      if((*p)&&(!*q)&&cur_node->continued)
        return recursive_match(I, cur_node+1, p, value_ptr);
      if((*p)!=(*q))
        return false;
    }
    break;
  case cMatchWildcard:
    { 
      char *p;
      p = text;
      if(!cur_node->continued)
        return true;
      else {
        while(*p) {
          if(recursive_match(I, cur_node+1, p, value_ptr))
            return 1;
          p++;
        }
      }
    }
    break;
  case cMatchAlphaRange:
    {
      char *l1 = I->charVLA + cur_node->literal1;
      char *l2 = I->charVLA + cur_node->literal2;
      if(((!cur_node->has1) ||
          (WordCompare(I->G,l1,text,ignore_case)<=0)) &&
         ((!cur_node->has2) ||
          (WordCompare(I->G,l2,text,ignore_case)>=0)))
        return true;
      else
        return false;
    }
    break;
  case cMatchNumericRange:
    if(value_ptr) {
      int value = *value_ptr;
      
      if(((!cur_node->has1) ||
          (cur_node->numeric1<= value)) &&
         ((!cur_node->has2) ||
          (cur_node->numeric2>= value)))
        return true;
    } else {
      int value;
      if(sscanf(text,"%d",&value)==1)
        if(((!cur_node->has1) ||
            (cur_node->numeric1 <= value)) &&
           ((!cur_node->has2) ||
            (cur_node->numeric2 >= value)))
          return true;
    }
    break;
  }
  return false;
}


int WordMatcherMatchAlpha(CWordMatcher *I, char *text)
{
  register MatchNode *cur_node = I->node;
  register int n_node = I->n_node;
  
  while(n_node--) {
    if(recursive_match(I, cur_node, text, NULL))
      return true;
    else {
      while(cur_node->continued)
        cur_node++;
      cur_node++;
    }
  }
  return false;
}

int WordMatcherMatchMixed(CWordMatcher *I, char *text,int value)
{
  register MatchNode *cur_node = I->node;
  register int n_node = I->n_node;
  
  while(n_node--) {
    if(recursive_match(I, cur_node, text, &value))
      return true;
    else {
      while(cur_node->continued)
        cur_node++;
      cur_node++;
    }
  }
  return false;
}

static int integer_match(CWordMatcher *I, MatchNode *cur_node,int value)
{
  switch(cur_node->match_mode) {
  case cMatchLiteral:
    if((cur_node->has1) && (cur_node->numeric1==value))
      return true;
    break;
  case cMatchNumericRange:
    if(((!cur_node->has1) ||
        (cur_node->numeric1<= value)) &&
       ((!cur_node->has2) ||
        (cur_node->numeric2>= value)))
      return true;
    break;
  }
  return false;
}

int WordMatcherMatchInteger(CWordMatcher *I,int value)
{
  register MatchNode *cur_node = I->node;
  register int n_node = I->n_node;
  
  while(n_node--) {
    if(integer_match(I, cur_node, value))
      return true;
    else {
      while(cur_node->continued)
        cur_node++;
      cur_node++;
    }
  }
  return false;
}

void WordMatcherFree(CWordMatcher *I)
{
  if(I) {
    VLAFreeP(I->node);
    VLAFreeP(I->charVLA);
  }
  OOFreeP(I);
}

CWordList *WordListNew(PyMOLGlobals *G,char *st)
{
  int n_word = 0;
  char *p;
  int len = 0;
  OOCalloc(G,CWordList);

  if(I) {
    p=st;
    /* first, count how many words we have */
    while(*p) {
      if(*p>32) {
        n_word++;
        while((*p)>32) {
          len++;
          p++;
        }
        len++;
      } else
        p++;
    }
    /* allocate the storage we'll need to hold the words */
    {
      I->word = Alloc(char,len);
      I->start = Alloc(char*,n_word);
      
      /* and copy the words */
      
      if(I->word && I->start) {
        char *q = I->word;
        char **q_ptr = I->start;
        p = st;
        while(*p) {
          if(*p>32) {
            *(q_ptr++) = q;
            while((*p)>32) {
              *(q++) = *(p++);
            }
            *(q++) = 0;
            len++;
          } else 
            p++;
        }
        I->n_word = n_word;
      }
    }
  }
  return I;
}

void WordListFreeP(CWordList *I)
{
  if(I) {
    FreeP(I->word);
    FreeP(I->start);
    FreeP(I);
  }
}

void WordListDump(CWordList *I,char *prefix)
{
  if(I) {
    int a;
    printf(" %s: n_word %d\n",prefix,I->n_word);
    for(a=0;a<I->n_word;a++) {
      printf(" %s: word %d=[%s]\n",prefix,a,I->start[a]);
    }
  }
}

int WordListMatch(PyMOLGlobals *G,CWordList *I,char *name, int ignore_case)
{
  int result = -1;
  if(I) {
    int a;
    for(a=0;a<I->n_word;a++) {
      if(WordMatch(G,I->start[a],name,ignore_case)) {
        result = a;
        break;
      }
    }
  }
  return result;
}

int WordInit(PyMOLGlobals *G)
{
  register CWord *I= NULL;
  
  I = (G->Word = Calloc(CWord,1));
  if(I) {
    return 1;
  } else 
    return 0;

}

void WordFree(PyMOLGlobals *G)
{
  FreeP(G->Word);
}


void WordPrimeCommaMatch(PyMOLGlobals *G,char *p)
{ /* replace '+' with ',' */
  while(*p) { /* this should not be done here... */
    if(*p=='+')
      if(!((*(p+1)==0)||(*(p+1)==',')||(*(p+1)=='+')))
        *p=',';
    p++;
  }
}

int WordMatchExact(PyMOLGlobals *G,char *p,char *q,int ignCase) 

/* 0 = no match
   non-zero = perfect match  */

{
  while((*p)&&(*q))
       {
            if(*p!=*q)
              {
                   if(!ignCase)
            return 0;
          else if(tolower(*p)!=tolower(*q))
            return 0;
              }
            p++;
            q++;
       }
  if((*p)!=(*q))
    return 0;
  return 1;
}

int WordMatchNoWild(PyMOLGlobals *G,char *p,char *q,int ignCase) 
/* allows for p to match when shorter than q.

Returns:
0 = no match
positive = match out to N characters
negative = perfect match  */

{
  int i=1;
  while((*p)&&(*q))
       {
            if(*p!=*q)
              {
                    if(ignCase)
                        {
                          if(tolower(*p)!=tolower(*q))
                               {
                                    i=0;
                                    break;
                               }
                        }
                   else
                        {
                          i=0;
                          break;
                        }
              }
            i++;
            p++;
            q++;
       }
  if((*p)&&(!*q))
    i=0;
  if(i&&((!*p)&&(!*q))) /*exact match gives negative value */
       i=-i;
  return(i);
}

int WordMatch(PyMOLGlobals *G,char *p,char *q,int ignCase) 
/* allows for terminal wildcard (*) in p
 * and allows for p to match when shorter than q.

Returns:
0 = no match
positive = match out to N characters
negative = perfect/wildcard match  */

{
  int i=1;
  register char WILDCARD = '*';
  while((*p)&&(*q))
       {
            if(*p!=*q)
              {
                   if(*p==WILDCARD)
                        {
                          i=-i;
                          break;
                        }
                   if(ignCase)
                        {
                          if(tolower(*p)!=tolower(*q))
                               {
                                    i=0;
                                    break;
                               }
                        }
                   else
                        {
                          i=0;
                          break;
                        }
              }
            i++;
            p++;
            q++;
       }
  if((!*q)&&(*p==WILDCARD))
       i=-i;
  if(*p!=WILDCARD) {
       if((*p)&&(!*q))
            i=0;
  }
  if(i&&((!*p)&&(!*q))) /*exact match gives negative value */
       i=-i;
  return(i);
}

int WordMatchComma(PyMOLGlobals *G,char *pp,char *qq,int ignCase) 
     /* allows for comma list in p, also allows wildcards (*) in p */
{
  register char *p=pp, *q=qq;
  register int i=0;
  register char WILDCARD = '*';
  register char pc,qc;
  register int ic = ignCase;
  int best_i=0;
  char *q_copy;
  int blank;
  int trailing_comma=0;

  blank = (!*p);
  q_copy=q;
  while(((*p)||(blank))&&(best_i>=0)) {
    blank=0;
    i=1;
    q=q_copy;
    while((pc=(*p))&&(qc=(*q)))
      {
        if(pc==',')
          break;
        if(pc!=qc)
          {
            if(pc==WILDCARD)
              {
                i=-i;
                break;
              }
            if(ic)
              {
                if(tolower(pc)!=tolower(qc))
                  {
                    i=0;
                    break;
                  }
              }
            else 
              {
                i=0;
                break;
              }
          }
        p++;
        q++;
        i++;
      }
    if((!*q)&&((*p==WILDCARD)||(*p==',')))
      i=-i;
    if((*p!=WILDCARD)&&(*p!=','))
      if((*p)&&(!*q))
        i=0;
    if(i&&((!*p)&&(!*q))) /*exact match*/
      i=-i;

    if(i<0)
      best_i=i;
    else if((best_i>=0))
      if(i>best_i)
        best_i=i;
    if(best_i>=0) {
      while(*p) {
        if(*p==',')
          break;
        p++;
      }
      if(*p==',') { /* handle special case, trailing comma */
        if(*(p+1))
          p++;
        else if(!trailing_comma)
          trailing_comma = 1;
        else
          p++;
      }
    }
  }
  return(best_i);
}

int WordMatchCommaExact(PyMOLGlobals *G,char *p,char *q,int ignCase) 
/* allows for comma list in p, no wildcards */
{
  int i=0;
  int best_i=0;
  char *q_copy;
  int blank;
  int trailing_comma=0;

  /*  printf("match? [%s] [%s] ",p,q);*/

  blank = (!*p);
  q_copy=q;
  while(((*p)||(blank))&&(best_i>=0)) {
    blank=0;
    i=1;
    q=q_copy;
    while((*p)&&(*q))
      {
        if(*p==',')
          break;
        if(*p!=*q)
          {
            if(ignCase)
              {
                if(tolower(*p)!=tolower(*q))
                  {
                    i=0;
                    break;
                  }
              }
            else 
              {
                i=0;
                break;
              }
          }
        i++;
        p++;
        q++;
      }
    if((!*q)&&(*p==','))
      i=-i;
    if(*p!=',')
      if((*p)&&(!*q))
        i=0;
    if(i&&((!*p)&&(!*q))) /*exact match*/
      i=-i;

    if(i<0)
      best_i=i;
    else if((best_i>=0))
      if(i>best_i)
        best_i=i;
    if(best_i>=0) {
      while(*p) {
        if(*p==',')
          break;
        p++;
      }
      if(*p==',') { /* handle special case, trailing comma */
        if(*(p+1))
          p++;
        else if(!trailing_comma)
          trailing_comma = 1;
        else
          p++;
      }
    }
  }
  /*  printf("result: %d\n",best_i);*/

  return(best_i);
}

int WordMatchCommaInt(PyMOLGlobals *G,char *p,int number) 
{
  WordType buffer;
  sprintf(buffer,"%d",number);
  return(WordMatchComma(G,p,buffer,1));
}


int WordIndex(PyMOLGlobals *G,WordType *list,char *word,int minMatch,int ignCase)
{
  int c,i,mi,mc;
  int result = -1;
  c=0;
  mc=-1;
  mi=-1;
  while(list[c][0])
       {
            i=WordMatch(G,word,list[c],ignCase);
            if(i>0)
              {
                   if(mi<i)
                        {
                          mi=i;
                          mc=c;
                        }
              }
            else if(i<0)
              {
                   if((-i)<minMatch)
                        mi=minMatch+1; /*exact match always matches */
                   else
                        mi=(-i);
                   mc=c;
              }
            c++;
       }
  if((mi>minMatch))
       result=mc;
  return(result);  

}

int WordKey(PyMOLGlobals *G,WordKeyValue *list,char *word,int minMatch,int ignCase,int *exact)
{
  int c,i,mi,mc;
  int result = 0;
  c=0;
  mc=-1;
  mi=-1;
  *exact = false;
  while(list[c].word[0])
    {
      i=WordMatchNoWild(G,word,list[c].word,ignCase);
            if(i>0)
              {
                   if(mi<i)
                        {
                          mi=i;
                          mc=list[c].value;
                        }
        }
      else if(i<0)
              {
          *exact = true;
                   if((-i)<=minMatch) {
            mi=minMatch+1; /*exact match always matches */
          }
          else
            mi=(-i);
          mc=list[c].value;
              }
            c++;
       }
  if((mi>=minMatch))
    result=mc;
  return(result);  
}






Generated by  Doxygen 1.6.0   Back to index