All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ie.AcquisitionsPrior Maven / Gradle / Ivy

package edu.stanford.nlp.ie;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.StringUtils;

/**
 * @author Jenny Finkel
 */

public class AcquisitionsPrior extends EntityCachingAbstractSequencePrior {

  double penalty = 4.0;
  double penalty1 = 3.0;
  double penalty2 = 4.0;

  public AcquisitionsPrior(String backgroundSymbol, Index classIndex, List doc) {
    super(backgroundSymbol, classIndex, doc);
  }

  public double scoreOf(int[] sequence) {

    Set purchasers = new HashSet();
    Set purchabrs = new HashSet();
    Set sellers = new HashSet();
    Set sellerabrs = new HashSet();
    Set acquireds = new HashSet();
    Set acqabrs = new HashSet();

    List purchasersL = new ArrayList();
    List purchabrsL = new  ArrayList();
    List sellersL = new  ArrayList();
    List sellerabrsL = new  ArrayList();
    List acquiredsL = new  ArrayList();
    List acqabrsL = new  ArrayList();

    double p = 0.0;
    for (int i = 0; i < entities.length; i++) {
      Entity entity = entities[i];
      if ((i == 0 || entities[i-1] != entity) && entity != null) {

        String type = classIndex.get(entity.type);
        String phrase = StringUtils.join(entity.words, " ").toLowerCase();
        if (type.equals("purchaser")) {
          purchasers.add(phrase);
          purchasersL.add(entity);
        } else if (type.equals("purchabr")) {
          purchabrs.add(phrase);
          purchabrsL.add(entity);
        } else if (type.equals("seller")) {
          sellers.add(phrase);
          sellersL.add(entity);
        } else if (type.equals("sellerabr")) {
          sellerabrs.add(phrase);
          sellerabrsL.add(entity);
        } else if (type.equals("acquired")) {
          acquireds.add(phrase);
          acquiredsL.add(entity);
        } else if (type.equals("acqabr")) {
          acqabrs.add(phrase);
          acqabrsL.add(entity);
        } else {
          System.err.println("unknown entity type: "+type);
          System.exit(0);
        }
      }
    }
    
    for (Entity purchaser : purchasersL) {
      if (purchasers.size() > 1) {
        p -= purchaser.words.size() * penalty;
      }
      String s = StringUtils.join(purchaser.words, "").toLowerCase();
      boolean match = false;
      for (Entity purchabr : purchabrsL) {
        String s1 = StringUtils.join(purchabr.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s1.length() - 2) {
        if (s.indexOf(s1) >= 0) {
          match = true;
          break;
        }
      }
      if (!match && purchabrs.size() > 0) {
        p -= purchaser.words.size() * penalty;
      }
    }

    for (Entity seller : sellersL) {
      if (sellers.size() > 1) {
        p -= seller.words.size() * penalty;
      }
      String s = StringUtils.join(seller.words, "").toLowerCase();
      boolean match = false;
      for (Entity sellerabr : sellerabrsL) {
        String s1 = StringUtils.join(sellerabr.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s1.length() - 2) {
        if (s.indexOf(s1) >= 0) {
          match = true;
          break;
        }
      }
      if (!match && sellerabrs.size() > 0) {
        p -= seller.words.size() * penalty;
      }
    }
    
    for (Entity acquired : acquiredsL) {
      if (acquireds.size() > 1) {
        p -= acquired.words.size() * penalty;
      }
      String s = StringUtils.join(acquired.words, "").toLowerCase();
      boolean match = false;
      for (Entity acqabr : acqabrsL) {
        String s1 = StringUtils.join(acqabr.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s1.length() - 2) {
        if (s.indexOf(s1) >= 0) {
          match = true;
          break;
        }
      }
      if (!match && acqabrs.size() > 0) {
        p -= acquired.words.size() * penalty;
      }
    }

    
    for (Entity purchabr : purchabrsL) {
      //p -= purchabr.words.size() * penalty;
      String s = StringUtils.join(purchabr.words, "").toLowerCase();
      boolean match = false;
      for (Entity purchaser : purchasersL) {
        String s1 = StringUtils.join(purchaser.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s1.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          match = true;
          break;
        }
      }
      if (!match) {
        p -= purchabr.words.size() * penalty2;
      }
      
      match = false;
      for (Entity acquired : acquiredsL) {
        String s1 = StringUtils.join(acquired.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          match = true;
          break;
        }
      }
      for (Entity seller : sellersL) {
        String s1 = StringUtils.join(seller.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          match = true;
          break;
        }
      }
      if (match) {
        p -= purchabr.words.size() * penalty1;
      }
    }

    for (Entity sellerabr : sellerabrsL) {
      //p -= sellerabr.words.size() * penalty;
      String s = StringUtils.join(sellerabr.words, "").toLowerCase();
      boolean match = false;
      for (Entity seller : sellersL) {
        String s1 = StringUtils.join(seller.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s1.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          match = true;
          break;
        }
      }
      if (!match) {
        p -= sellerabr.words.size() * penalty2;
      }
      
      
      match = false;
      for (Entity acquired : acquiredsL) {
        String s1 = StringUtils.join(acquired.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          match = true;
          break;
        }
      }
      for (Entity purchaser : purchasersL) {
        String s1 = StringUtils.join(purchaser.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          match = true;
          break;
        }
      }
      if (match) {
        p -= sellerabr.words.size() * penalty1;
      }
    }


    for (Entity acqabr : acqabrsL) {
      //p -= acqabr.words.size() * penalty;
      String s = StringUtils.join(acqabr.words, "").toLowerCase();
      boolean match = false;
      for (Entity acquired : acquiredsL) {
        String s1 = StringUtils.join(acquired.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s1.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          match = true;
          break;
        }
      }
      if (!match) {
        p -= acqabr.words.size() * penalty2;
      }
      
      match = false;
      for (Entity seller : sellersL) {
        String s1 = StringUtils.join(seller.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          //System.err.println(acqabr.toString(classIndex)+"\n"+seller.toString(classIndex)+"\n");
          match = true;
            break;
        }
      }
      for (Entity purchaser : purchasersL) {
        String s1 = StringUtils.join(purchaser.words, "").toLowerCase();
        //int dist = StringUtils.longestCommonSubstring(s, s1);          
        //if (dist > s.length() - 2) {
        if (s1.indexOf(s) >= 0) {
          match = true;
          break;
        }
      }
      if (match) {
        p -= acqabr.words.size() * penalty1;
      }
    }

    return p;
  }
  
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy