@@ -106,10 +106,10 @@ def _get_surrounding_secret_lines(
106106 end_line = min (secret_line + surrounding_lines , len (lines ))
107107 return "\n " .join (lines [start_line :end_line ])
108108
109- def obfuscate (self , text : str ) -> tuple [str , int ]:
109+ def obfuscate (self , text : str ) -> tuple [str , List [ Match ] ]:
110110 matches = CodegateSignatures .find_in_string (text )
111111 if not matches :
112- return text , 0
112+ return text , []
113113
114114 logger .debug (f"Found { len (matches )} secrets in the user message" )
115115
@@ -133,16 +133,16 @@ def obfuscate(self, text: str) -> tuple[str, int]:
133133 protected_text = list (text )
134134
135135 # Store matches for logging
136- found_secrets = 0
136+ found_secrets = []
137137
138138 # First pass. Replace each match with its encrypted value
139- logger .info ("\n Found secrets:" )
139+ logger .info (f "\n Found { len ( absolute_matches ) } secrets:" )
140140 for start , end , match in absolute_matches :
141141 hidden_secret = self ._hide_secret (match )
142142
143143 # Replace the secret in the text
144144 protected_text [start :end ] = hidden_secret
145- found_secrets += 1
145+ found_secrets . append ( match )
146146 # Log the findings
147147 logger .info (
148148 f"\n Service: { match .service } "
@@ -228,7 +228,7 @@ def name(self) -> str:
228228
229229 def _redact_text (
230230 self , text : str , secrets_manager : SecretsManager , session_id : str , context : PipelineContext
231- ) -> tuple [str , int ]:
231+ ) -> tuple [str , List [ Match ] ]:
232232 """
233233 Find and encrypt secrets in the given text.
234234
@@ -269,7 +269,7 @@ async def process(
269269 raise ValueError ("Session ID not found in context" )
270270
271271 new_request = request .copy ()
272- total_redacted = 0
272+ total_matches = []
273273
274274 # Process all messages
275275 last_assistant_idx = - 1
@@ -281,15 +281,18 @@ async def process(
281281 for i , message in enumerate (new_request ["messages" ]):
282282 if "content" in message and message ["content" ]:
283283 # Protect the text
284- protected_string , redacted_count = self ._redact_text (
284+ protected_string , secrets_matched = self ._redact_text (
285285 str (message ["content" ]), secrets_manager , session_id , context
286286 )
287287 new_request ["messages" ][i ]["content" ] = protected_string
288288
289- # Sum redacted count for messages after the last assistant message
289+ # Append the matches for messages after the last assistant message
290290 if i > last_assistant_idx :
291- total_redacted += redacted_count
291+ total_matches += secrets_matched
292292
293+ # Not count repeated secret matches
294+ set_secrets_value = set (match .value for match in total_matches )
295+ total_redacted = len (set_secrets_value )
293296 context .secrets_found = total_redacted > 0
294297 logger .info (f"Total secrets redacted since last assistant message: { total_redacted } " )
295298
@@ -362,7 +365,6 @@ async def process_chunk(
362365 if match :
363366 # Found a complete marker, process it
364367 encrypted_value = match .group (1 )
365- print ("----> encrypted_value: " , encrypted_value )
366368 original_value = input_context .sensitive .manager .get_original_value (
367369 encrypted_value ,
368370 input_context .sensitive .session_id ,
@@ -371,8 +373,6 @@ async def process_chunk(
371373 if original_value is None :
372374 # If value not found, leave as is
373375 original_value = match .group (0 ) # Keep the REDACTED marker
374- else :
375- print ("----> original_value: " , original_value )
376376
377377 # Post an alert with the redacted content
378378 input_context .add_alert (self .name , trigger_string = encrypted_value )
0 commit comments