-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ZDM-522 Add size limit to PS Cache #99
base: main
Are you sure you want to change the base?
Changes from all commits
02789d4
9c73014
742a67c
42a20f4
7c0b69e
e292ad4
d54d19d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,33 +4,49 @@ import ( | |
"encoding/hex" | ||
"fmt" | ||
"github.com/datastax/go-cassandra-native-protocol/message" | ||
lru "github.com/hashicorp/golang-lru" | ||
log "github.com/sirupsen/logrus" | ||
"sync" | ||
) | ||
|
||
type PreparedStatementCache struct { | ||
cache map[string]PreparedData // Map containing the prepared queries (raw bytes) keyed on prepareId | ||
index map[string]string // Map that can be used as an index to look up origin prepareIds by target prepareId | ||
cache *lru.Cache // Map containing the prepared queries (raw bytes) keyed on prepareId | ||
index *lru.Cache // Map that can be used as an index to look up origin prepareIds by target prepareId | ||
|
||
interceptedCache map[string]PreparedData // Map containing the prepared queries for intercepted requests | ||
interceptedCache *lru.Cache // Map containing the prepared queries for intercepted requests | ||
|
||
lock *sync.RWMutex | ||
} | ||
|
||
func NewPreparedStatementCache() *PreparedStatementCache { | ||
func NewPreparedStatementCache(maxSize int) (*PreparedStatementCache, error) { | ||
cache, err := lru.New(maxSize) | ||
if err != nil { | ||
return nil, fmt.Errorf("error initializing the PreparedStatementCache cache map: %v", err) | ||
} | ||
|
||
index, err := lru.New(maxSize) | ||
if err != nil { | ||
return nil, fmt.Errorf("error initializing the PreparedStatementCache index map: %v", err) | ||
} | ||
|
||
interceptedCache, err := lru.New(maxSize) | ||
if err != nil { | ||
return nil, fmt.Errorf("error initializing the PreparedStatementCache interceptedCache map: %v", err) | ||
} | ||
|
||
return &PreparedStatementCache{ | ||
cache: make(map[string]PreparedData), | ||
index: make(map[string]string), | ||
interceptedCache: make(map[string]PreparedData), | ||
cache: cache, | ||
index: index, | ||
interceptedCache: interceptedCache, | ||
lock: &sync.RWMutex{}, | ||
} | ||
}, nil | ||
} | ||
|
||
func (psc PreparedStatementCache) GetPreparedStatementCacheSize() float64 { | ||
psc.lock.RLock() | ||
defer psc.lock.RUnlock() | ||
|
||
return float64(len(psc.cache) + len(psc.interceptedCache)) | ||
return float64(psc.cache.Len() + psc.interceptedCache.Len()) | ||
} | ||
|
||
func (psc *PreparedStatementCache) Store( | ||
|
@@ -42,8 +58,8 @@ func (psc *PreparedStatementCache) Store( | |
psc.lock.Lock() | ||
defer psc.lock.Unlock() | ||
|
||
psc.cache[originPrepareIdStr] = NewPreparedData(originPreparedResult, targetPreparedResult, prepareRequestInfo) | ||
psc.index[targetPrepareIdStr] = originPrepareIdStr | ||
psc.cache.Add(originPrepareIdStr, NewPreparedData(originPreparedResult, targetPreparedResult, prepareRequestInfo)) | ||
psc.index.Add(targetPrepareIdStr, originPrepareIdStr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm what happens if the limit is reached and the key that is selected to be evicted on one cache is different from the key that was selected on the other cache? I'm not sure if bad behavior can happen if these two caches are out of sync or if it's fine... If we do need both caches to be in sync then a potential alternative would be to provide an eviction callback to the |
||
|
||
log.Debugf("Storing PS cache entry: {OriginPreparedId=%v, TargetPreparedId: %v, RequestInfo: %v}", | ||
hex.EncodeToString(originPreparedResult.PreparedQueryId), hex.EncodeToString(targetPreparedResult.PreparedQueryId), prepareRequestInfo) | ||
|
@@ -55,7 +71,7 @@ func (psc *PreparedStatementCache) StoreIntercepted(preparedResult *message.Prep | |
defer psc.lock.Unlock() | ||
|
||
preparedData := NewPreparedData(preparedResult, preparedResult, prepareRequestInfo) | ||
psc.interceptedCache[prepareIdStr] = preparedData | ||
psc.interceptedCache.Add(prepareIdStr, preparedData) | ||
|
||
log.Debugf("Storing intercepted PS cache entry: {PreparedId=%v, RequestInfo: %v}", | ||
hex.EncodeToString(preparedResult.PreparedQueryId), prepareRequestInfo) | ||
|
@@ -64,31 +80,37 @@ func (psc *PreparedStatementCache) StoreIntercepted(preparedResult *message.Prep | |
func (psc *PreparedStatementCache) Get(originPreparedId []byte) (PreparedData, bool) { | ||
psc.lock.RLock() | ||
defer psc.lock.RUnlock() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to reevaluate these locks. This new cache implementation already does locking so it would be great if we could remove our locks. |
||
data, ok := psc.cache[string(originPreparedId)] | ||
if !ok { | ||
data, ok = psc.interceptedCache[string(originPreparedId)] | ||
data, ok := psc.cache.Get(string(originPreparedId)) | ||
if ok { | ||
return data.(PreparedData), true | ||
} | ||
return data, ok | ||
|
||
data, ok = psc.interceptedCache.Get(string(originPreparedId)) | ||
if ok { | ||
return data.(PreparedData), true | ||
} | ||
|
||
return nil, false | ||
} | ||
|
||
func (psc *PreparedStatementCache) GetByTargetPreparedId(targetPreparedId []byte) (PreparedData, bool) { | ||
psc.lock.RLock() | ||
defer psc.lock.RUnlock() | ||
|
||
originPreparedId, ok := psc.index[string(targetPreparedId)] | ||
originPreparedId, ok := psc.index.Get(string(targetPreparedId)) | ||
if !ok { | ||
// Don't bother attempting a lookup on the intercepted cache because this method should only be used to handle UNPREPARED responses | ||
return nil, false | ||
} | ||
|
||
data, ok := psc.cache[originPreparedId] | ||
data, ok := psc.cache.Get(originPreparedId) | ||
if !ok { | ||
log.Errorf("Could not get prepared data by target id even though there is an entry on the index map. "+ | ||
"This is most likely a bug. OriginPreparedId = %v, TargetPreparedId = %v", originPreparedId, targetPreparedId) | ||
return nil, false | ||
} | ||
|
||
return data, true | ||
return data.(PreparedData), true | ||
} | ||
|
||
type PreparedData interface { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we know what this limit is on the server side? We don't have to match it but it would probably be good to know before we decide which limit to set as the default on the proxy
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The limit is based on size, rather than number of prepared statements:
The default calculated value is 1/256th of the heap or 10 MB, whichever is greater
.When I set the default to 5000, my intention was to keep the PS cache memory footprint relatively small, given that the proxy usually runs on instances with limited resources. It is quite unlikely for applications to create a large number of prepared statements if they are using them correctly, so even if a user has multiple applications using the proxy I thought that this should be a reasonable value. On the other hand, the footprint of each statement in the proxy PS cache maps is small, so choosing a default value that is a bit higher should also be fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So for a 16GB heap the limit would be about 60MB, I think it would be good to get an estimate of what size an average prepared statement has on our cache so we can come up with a good value for this limit instead of guessing blindly. 5000 sounds a bit too low but without any data on the size that each statement takes I'm just guessing blindly. Is there server metrics for the prepared cache size? We could use those metrics in a benchmark to get some data that would help us come up with a good limit.