@@ -978,6 +978,10 @@ def predict(
978978 grounding_source : Optional [
979979 Union [GroundingSource .WebSearch , GroundingSource .VertexAISearch ]
980980 ] = None ,
981+ logprobs : Optional [int ] = None ,
982+ presence_penalty : Optional [float ] = None ,
983+ frequency_penalty : Optional [float ] = None ,
984+ logit_bias : Optional [Dict [int , float ]] = None ,
981985 ) -> "MultiCandidateTextGenerationResponse" :
982986 """Gets model response for a single prompt.
983987
@@ -990,6 +994,26 @@ def predict(
990994 stop_sequences: Customized stop sequences to stop the decoding process.
991995 candidate_count: Number of response candidates to return.
992996 grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
997+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
998+ at each generation step. The chosen tokens and their log probabilities at each step are always
999+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1000+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1001+ probabilities are returned.
1002+ The maximum value for `logprobs` is 5.
1003+ presence_penalty:
1004+ Positive values penalize tokens that have appeared in the generated text,
1005+ thus increasing the possibility of generating more diversed topics.
1006+ Range: [-2.0, 2.0]
1007+ frequency_penalty:
1008+ Positive values penalize tokens that repeatedly appear in the generated
1009+ text, thus decreasing the possibility of repeating the same content.
1010+ Range: [-2.0, 2.0]
1011+ logit_bias:
1012+ Mapping from token IDs (integers) to their bias values (floats).
1013+ The bias values are added to the logits before sampling.
1014+ Larger positive bias increases the probability of choosing the token.
1015+ Smaller negative bias decreases the probability of choosing the token.
1016+ Range: [-100.0, 100.0]
9931017
9941018 Returns:
9951019 A `MultiCandidateTextGenerationResponse` object that contains the text produced by the model.
@@ -1003,6 +1027,10 @@ def predict(
10031027 stop_sequences = stop_sequences ,
10041028 candidate_count = candidate_count ,
10051029 grounding_source = grounding_source ,
1030+ logprobs = logprobs ,
1031+ presence_penalty = presence_penalty ,
1032+ frequency_penalty = frequency_penalty ,
1033+ logit_bias = logit_bias ,
10061034 )
10071035
10081036 prediction_response = self ._endpoint .predict (
@@ -1027,6 +1055,10 @@ async def predict_async(
10271055 grounding_source : Optional [
10281056 Union [GroundingSource .WebSearch , GroundingSource .VertexAISearch ]
10291057 ] = None ,
1058+ logprobs : Optional [int ] = None ,
1059+ presence_penalty : Optional [float ] = None ,
1060+ frequency_penalty : Optional [float ] = None ,
1061+ logit_bias : Optional [Dict [int , float ]] = None ,
10301062 ) -> "MultiCandidateTextGenerationResponse" :
10311063 """Asynchronously gets model response for a single prompt.
10321064
@@ -1039,6 +1071,26 @@ async def predict_async(
10391071 stop_sequences: Customized stop sequences to stop the decoding process.
10401072 candidate_count: Number of response candidates to return.
10411073 grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
1074+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1075+ at each generation step. The chosen tokens and their log probabilities at each step are always
1076+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1077+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1078+ probabilities are returned.
1079+ The maximum value for `logprobs` is 5.
1080+ presence_penalty:
1081+ Positive values penalize tokens that have appeared in the generated text,
1082+ thus increasing the possibility of generating more diversed topics.
1083+ Range: [-2.0, 2.0]
1084+ frequency_penalty:
1085+ Positive values penalize tokens that repeatedly appear in the generated
1086+ text, thus decreasing the possibility of repeating the same content.
1087+ Range: [-2.0, 2.0]
1088+ logit_bias:
1089+ Mapping from token IDs (integers) to their bias values (floats).
1090+ The bias values are added to the logits before sampling.
1091+ Larger positive bias increases the probability of choosing the token.
1092+ Smaller negative bias decreases the probability of choosing the token.
1093+ Range: [-100.0, 100.0]
10421094
10431095 Returns:
10441096 A `MultiCandidateTextGenerationResponse` object that contains the text produced by the model.
@@ -1052,6 +1104,10 @@ async def predict_async(
10521104 stop_sequences = stop_sequences ,
10531105 candidate_count = candidate_count ,
10541106 grounding_source = grounding_source ,
1107+ logprobs = logprobs ,
1108+ presence_penalty = presence_penalty ,
1109+ frequency_penalty = frequency_penalty ,
1110+ logit_bias = logit_bias ,
10551111 )
10561112
10571113 prediction_response = await self ._endpoint .predict_async (
@@ -1072,6 +1128,10 @@ def predict_streaming(
10721128 top_k : Optional [int ] = None ,
10731129 top_p : Optional [float ] = None ,
10741130 stop_sequences : Optional [List [str ]] = None ,
1131+ logprobs : Optional [int ] = None ,
1132+ presence_penalty : Optional [float ] = None ,
1133+ frequency_penalty : Optional [float ] = None ,
1134+ logit_bias : Optional [Dict [int , float ]] = None ,
10751135 ) -> Iterator [TextGenerationResponse ]:
10761136 """Gets a streaming model response for a single prompt.
10771137
@@ -1084,6 +1144,26 @@ def predict_streaming(
10841144 top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
10851145 top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
10861146 stop_sequences: Customized stop sequences to stop the decoding process.
1147+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1148+ at each generation step. The chosen tokens and their log probabilities at each step are always
1149+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1150+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1151+ probabilities are returned.
1152+ The maximum value for `logprobs` is 5.
1153+ presence_penalty:
1154+ Positive values penalize tokens that have appeared in the generated text,
1155+ thus increasing the possibility of generating more diversed topics.
1156+ Range: [-2.0, 2.0]
1157+ frequency_penalty:
1158+ Positive values penalize tokens that repeatedly appear in the generated
1159+ text, thus decreasing the possibility of repeating the same content.
1160+ Range: [-2.0, 2.0]
1161+ logit_bias:
1162+ Mapping from token IDs (integers) to their bias values (floats).
1163+ The bias values are added to the logits before sampling.
1164+ Larger positive bias increases the probability of choosing the token.
1165+ Smaller negative bias decreases the probability of choosing the token.
1166+ Range: [-100.0, 100.0]
10871167
10881168 Yields:
10891169 A stream of `TextGenerationResponse` objects that contain partial
@@ -1096,6 +1176,10 @@ def predict_streaming(
10961176 top_k = top_k ,
10971177 top_p = top_p ,
10981178 stop_sequences = stop_sequences ,
1179+ logprobs = logprobs ,
1180+ presence_penalty = presence_penalty ,
1181+ frequency_penalty = frequency_penalty ,
1182+ logit_bias = logit_bias ,
10991183 )
11001184
11011185 prediction_service_client = self ._endpoint ._prediction_client
@@ -1122,6 +1206,10 @@ async def predict_streaming_async(
11221206 top_k : Optional [int ] = None ,
11231207 top_p : Optional [float ] = None ,
11241208 stop_sequences : Optional [List [str ]] = None ,
1209+ logprobs : Optional [int ] = None ,
1210+ presence_penalty : Optional [float ] = None ,
1211+ frequency_penalty : Optional [float ] = None ,
1212+ logit_bias : Optional [Dict [int , float ]] = None ,
11251213 ) -> AsyncIterator [TextGenerationResponse ]:
11261214 """Asynchronously gets a streaming model response for a single prompt.
11271215
@@ -1134,6 +1222,26 @@ async def predict_streaming_async(
11341222 top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
11351223 top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
11361224 stop_sequences: Customized stop sequences to stop the decoding process.
1225+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1226+ at each generation step. The chosen tokens and their log probabilities at each step are always
1227+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1228+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1229+ probabilities are returned.
1230+ The maximum value for `logprobs` is 5.
1231+ presence_penalty:
1232+ Positive values penalize tokens that have appeared in the generated text,
1233+ thus increasing the possibility of generating more diversed topics.
1234+ Range: [-2.0, 2.0]
1235+ frequency_penalty:
1236+ Positive values penalize tokens that repeatedly appear in the generated
1237+ text, thus decreasing the possibility of repeating the same content.
1238+ Range: [-2.0, 2.0]
1239+ logit_bias:
1240+ Mapping from token IDs (integers) to their bias values (floats).
1241+ The bias values are added to the logits before sampling.
1242+ Larger positive bias increases the probability of choosing the token.
1243+ Smaller negative bias decreases the probability of choosing the token.
1244+ Range: [-100.0, 100.0]
11371245
11381246 Yields:
11391247 A stream of `TextGenerationResponse` objects that contain partial
@@ -1146,6 +1254,10 @@ async def predict_streaming_async(
11461254 top_k = top_k ,
11471255 top_p = top_p ,
11481256 stop_sequences = stop_sequences ,
1257+ logprobs = logprobs ,
1258+ presence_penalty = presence_penalty ,
1259+ frequency_penalty = frequency_penalty ,
1260+ logit_bias = logit_bias ,
11491261 )
11501262
11511263 prediction_service_async_client = self ._endpoint ._prediction_async_client
@@ -1174,6 +1286,10 @@ def _create_text_generation_prediction_request(
11741286 grounding_source : Optional [
11751287 Union [GroundingSource .WebSearch , GroundingSource .VertexAISearch ]
11761288 ] = None ,
1289+ logprobs : Optional [int ] = None ,
1290+ presence_penalty : Optional [float ] = None ,
1291+ frequency_penalty : Optional [float ] = None ,
1292+ logit_bias : Optional [Dict [int , int ]] = None ,
11771293) -> "_PredictionRequest" :
11781294 """Prepares the text generation request for a single prompt.
11791295
@@ -1186,7 +1302,26 @@ def _create_text_generation_prediction_request(
11861302 stop_sequences: Customized stop sequences to stop the decoding process.
11871303 candidate_count: Number of candidates to return.
11881304 grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None.
1189-
1305+ logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities
1306+ at each generation step. The chosen tokens and their log probabilities at each step are always
1307+ returned. The chosen token may or may not be in the top `logprobs` most likely candidates.
1308+ The minimum value for `logprobs` is 0, which means only the chosen tokens and their log
1309+ probabilities are returned.
1310+ The maximum value for `logprobs` is 5.
1311+ presence_penalty:
1312+ Positive values penalize tokens that have appeared in the generated text,
1313+ thus increasing the possibility of generating more diversed topics.
1314+ Range: [-2.0, 2.0]
1315+ frequency_penalty:
1316+ Positive values penalize tokens that repeatedly appear in the generated
1317+ text, thus decreasing the possibility of repeating the same content.
1318+ Range: [-2.0, 2.0]
1319+ logit_bias:
1320+ Mapping from token IDs (integers) to their bias values (floats).
1321+ The bias values are added to the logits before sampling.
1322+ Larger positive bias increases the probability of choosing the token.
1323+ Smaller negative bias decreases the probability of choosing the token.
1324+ Range: [-100.0, 100.0]
11901325
11911326 Returns:
11921327 A `_PredictionRequest` object that contains prediction instance and parameters.
@@ -1221,6 +1356,18 @@ def _create_text_generation_prediction_request(
12211356 "groundingConfig"
12221357 ] = grounding_source ._to_grounding_source_dict ()
12231358
1359+ if logprobs is not None :
1360+ prediction_parameters ["logprobs" ] = logprobs
1361+
1362+ if presence_penalty is not None :
1363+ prediction_parameters ["presencePenalty" ] = presence_penalty
1364+
1365+ if frequency_penalty is not None :
1366+ prediction_parameters ["frequencyPenalty" ] = frequency_penalty
1367+
1368+ if logit_bias is not None :
1369+ prediction_parameters ["logitBias" ] = logit_bias
1370+
12241371 return _PredictionRequest (
12251372 instance = instance ,
12261373 parameters = prediction_parameters ,
0 commit comments