io.shiftleft.utils.StringInterner.scala Maven / Gradle / Ivy
package io.shiftleft.utils
import gnu.trove.map.hash.THashMap
/** Interface for deduplicating strings used by CPGLoader. JVM with G1 GC does this automatically but it is not
* guaranteed to happen and when performing CPU heavy calculations doing this manually ensures that we won't run out of
* memory.
*/
trait StringInterner {
def intern(s: String): String
}
object StringInterner {
val DefaultMaxStringLength: Int = 1024
val DefaultInitialSize: Int = 64 * 1024
val noop: StringInterner = new StringInterner {
override def intern(s: String): String = s
}
/** Creates a string interner that will hold strong references to the interned objects and they wont be GC'ed until
* reference to the interner is released.
* @param maxStringLength
* Maximum string length that will be considered for interning
* @param initialSize
* Initial string table size
* @return
* Instance of the StringInterner which uses strong references
*/
def makeStrongInterner(
maxStringLength: Int = DefaultMaxStringLength,
initialSize: Int = DefaultInitialSize
): StringInterner = new StringInterner {
private val stringCache = new THashMap[String, String](initialSize)
def intern(s: String): String = {
if (s.length < maxStringLength) {
val cached = stringCache.get(s)
if (cached == null) {
stringCache.put(s, s)
s
} else {
cached
}
} else {
s
}
}
}
}