Skip to content

Commit 71d1fe5

Browse files
committed
Resolving comments
1 parent 3f3a511 commit 71d1fe5

File tree

4 files changed

+3835
-2869
lines changed

4 files changed

+3835
-2869
lines changed

mssql_python/connection.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,35 @@
5656

5757
def _validate_encoding(encoding: str) -> bool:
5858
"""
59-
Cached encoding validation using codecs.lookup().
60-
59+
Validate encoding name for security and correctness.
60+
61+
This function performs two-layer validation:
62+
1. Security check: Ensures only safe characters are in the encoding name
63+
2. Codec check: Verifies it's a valid Python codec
64+
6165
Args:
6266
encoding (str): The encoding name to validate.
6367
6468
Returns:
65-
bool: True if encoding is valid, False otherwise.
69+
bool: True if encoding is valid and safe, False otherwise.
6670
6771
Note:
68-
Uses LRU cache to avoid repeated expensive codecs.lookup() calls.
69-
Cache size is limited to 128 entries which should cover most use cases.
72+
Rejects encodings with:
73+
- Empty or too long names (>100 chars)
74+
- Suspicious characters (only alphanumeric, hyphen, underscore, dot allowed)
75+
- Invalid Python codecs
7076
"""
77+
# Security validation: Check length and characters
78+
if not encoding or len(encoding) > 100:
79+
return False
80+
81+
# Only allow safe characters in encoding names
82+
# Valid codec names contain: letters, numbers, hyphens, underscores, dots
83+
for char in encoding:
84+
if not (char.isalnum() or char in ('-', '_', '.')):
85+
return False
86+
87+
# Verify it's a valid Python codec
7188
try:
7289
codecs.lookup(encoding)
7390
return True

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 16 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -181,84 +181,39 @@ SQLTablesFunc SQLTables_ptr = nullptr;
181181
SQLDescribeParamFunc SQLDescribeParam_ptr = nullptr;
182182

183183

184-
// Encoding function with fallback strategy
185-
static py::bytes EncodingString(const std::string& text, const std::string& encoding, const std::string& errors = "strict") {
184+
// Encoding String
185+
static py::bytes EncodingString(const std::string& text,
186+
const std::string& encoding,
187+
const std::string& errors = "strict") {
186188
try {
187189
py::gil_scoped_acquire gil;
188-
189-
// Create unicode string from input text
190190
py::str unicode_str = py::str(text);
191191

192-
// Encoding strategy: try the specified encoding first,
193-
// but fallback to latin-1 for Western European characters if UTF-8 fails
194-
if (encoding == "utf-8" && errors == "strict") {
195-
try {
196-
// Try UTF-8 first
197-
py::bytes encoded = unicode_str.attr("encode")(encoding, "strict");
198-
return encoded;
199-
} catch (const py::error_already_set&) {
200-
// UTF-8 failed, try latin-1 for Western European characters
201-
try {
202-
py::bytes encoded = unicode_str.attr("encode")("latin-1", "strict");
203-
LOG("EncodingString: UTF-8 failed, successfully encoded with latin-1 fallback for {} characters", text.length());
204-
return encoded;
205-
} catch (const py::error_already_set&) {
206-
// Both failed, use original approach with error handling
207-
py::bytes encoded = unicode_str.attr("encode")(encoding, errors);
208-
return encoded;
209-
}
210-
}
211-
} else {
212-
// Use specified encoding directly for non-UTF-8 or non-strict cases
213-
py::bytes encoded = unicode_str.attr("encode")(encoding, errors);
214-
return encoded;
215-
}
192+
// Direct encoding - let Python handle errors strictly
193+
py::bytes encoded = unicode_str.attr("encode")(encoding, errors);
194+
return encoded;
216195

217196
} catch (const py::error_already_set& e) {
218-
// Re-raise Python exceptions as C++ exceptions
197+
// Re-raise Python exceptions (UnicodeEncodeError, etc.)
219198
throw std::runtime_error("Encoding failed: " + std::string(e.what()));
220-
} catch (const std::exception& e) {
221-
throw std::runtime_error("Encoding error: " + std::string(e.what()));
222199
}
223200
}
224201

225-
static py::str DecodingString(const char* data, size_t length, const std::string& encoding, const std::string& errors = "strict") {
202+
// Decoding String
203+
static py::str DecodingString(const char* data, size_t length,
204+
const std::string& encoding,
205+
const std::string& errors = "strict") {
226206
try {
227207
py::gil_scoped_acquire gil;
228-
229-
// Create bytes object from input data
230208
py::bytes byte_data = py::bytes(std::string(data, length));
231209

232-
// Decoding strategy: try the specified encoding first,
233-
// but fallback to latin-1 for Western European characters if UTF-8 fails
234-
if (encoding == "utf-8" && errors == "strict") {
235-
try {
236-
// Try UTF-8 first
237-
py::str decoded = byte_data.attr("decode")(encoding, "strict");
238-
return decoded;
239-
} catch (const py::error_already_set&) {
240-
// UTF-8 failed, try latin-1 for Western European characters
241-
try {
242-
py::str decoded = byte_data.attr("decode")("latin-1", "strict");
243-
LOG("DecodingString: UTF-8 failed, successfully decoded with latin-1 fallback for {} bytes", length);
244-
return decoded;
245-
} catch (const py::error_already_set&) {
246-
// Both failed, use original approach with error handling
247-
py::str decoded = byte_data.attr("decode")(encoding, errors);
248-
return decoded;
249-
}
250-
}
251-
} else {
252-
// Use specified encoding directly for non-UTF-8 or non-strict cases
253-
py::str decoded = byte_data.attr("decode")(encoding, errors);
254-
return decoded;
255-
}
210+
// Direct decoding - let Python handle errors strictly
211+
py::str decoded = byte_data.attr("decode")(encoding, errors);
212+
return decoded;
256213

257214
} catch (const py::error_already_set& e) {
258-
// Re-raise Python exceptions as C++ exceptions
215+
// Re-raise Python exceptions (UnicodeDecodeError, etc.)
259216
throw std::runtime_error("Decoding failed: " + std::string(e.what()));
260-
} catch (const std::exception& e) {
261-
throw std::runtime_error("Decoding error: " + std::string(e.what()));
262217
}
263218
}
264219

0 commit comments

Comments
 (0)