Fix security issue: Unsafe Pickle Deserialization Enabling Remote Code Execution (CWE-502, ML08)

This commit is contained in:
pensarapp[bot] 2025-06-09 16:42:37 +00:00 committed by GitHub
parent ecbabbd261
commit 764c2baa3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -176,42 +176,42 @@ class DataPoint(BaseModel):
""" """
return self.model_validate_json(json_str) return self.model_validate_json(json_str)
# Pickle Serialization # Pickle Serialization (safe patch: use JSON-under-bytes, not pickle)
def to_pickle(self) -> bytes: def to_pickle(self) -> bytes:
""" """
Serialize the DataPoint instance to a byte format for pickling. Serialize the DataPoint instance to a byte format for persistence or transmission.
This method uses the built-in Python pickle module to convert the instance into a byte For security, this implementation uses JSON for serialization and encodes as UTF-8 bytes.
stream for persistence or transmission.
Returns: Returns:
-------- --------
- bytes: The pickled byte representation of the DataPoint instance. - bytes: The serialized byte representation of the DataPoint instance.
""" """
return pickle.dumps(self.dict()) json_str = self.to_json()
return json_str.encode('utf-8')
@classmethod @classmethod
def from_pickle(self, pickled_data: bytes): def from_pickle(self, pickled_data: bytes):
""" """
Deserialize a DataPoint instance from a pickled byte stream. Deserialize a DataPoint instance from a serialized byte stream.
The method converts the byte stream back into a DataPoint instance by loading the data For security, this implementation expects UTF-8 encoded JSON data.
and validating it through the model's constructor.
Parameters: Parameters:
----------- -----------
- pickled_data (bytes): The bytes representation of a pickled DataPoint instance to - pickled_data (bytes): The bytes representation of a serialized DataPoint instance to
be deserialized. be deserialized.
Returns: Returns:
-------- --------
A new DataPoint instance created from the pickled data. A new DataPoint instance created from the serialized data.
""" """
data = pickle.loads(pickled_data) # Do NOT use pickle.loads.
return self(**data) json_str = pickled_data.decode('utf-8')
return self.from_json(json_str)
def to_dict(self, **kwargs) -> Dict[str, Any]: def to_dict(self, **kwargs) -> Dict[str, Any]:
""" """
@ -252,4 +252,4 @@ class DataPoint(BaseModel):
- 'DataPoint': A new DataPoint instance constructed from the provided dictionary - 'DataPoint': A new DataPoint instance constructed from the provided dictionary
data. data.
""" """
return cls.model_validate(data) return cls.model_validate(data)