diff --git a/benchmarks/profiling/benchmark_throughput.py b/benchmarks/profiling/benchmark_throughput.py index 3bbdd7d4267a..67e111f442d6 100644 --- a/benchmarks/profiling/benchmark_throughput.py +++ b/benchmarks/profiling/benchmark_throughput.py @@ -272,7 +272,8 @@ def main(args: argparse.Namespace): args.tokenizer, trust_remote_code=args.trust_remote_code) if args.dataset is None: # Synthesize a prompt with the given input length. - prompt = "hi" * (args.input_len - 1) + prompt = { "prompt_token_ids" : [42] * (args.input_len - 1) } \ + if args.skip_tokenizer_init else "hi" * (args.input_len - 1) requests = [(prompt, args.input_len, args.output_len) for _ in range(args.num_prompts)] else: diff --git a/vllm/engine/output_processor/multi_step.py b/vllm/engine/output_processor/multi_step.py index c8b282b1a767..99c2baf3f4df 100644 --- a/vllm/engine/output_processor/multi_step.py +++ b/vllm/engine/output_processor/multi_step.py @@ -144,7 +144,7 @@ def process_outputs(self, def _process_decode_and_stop(self, seq: Sequence, sampling_params: SamplingParams) -> None: new_char_count = 0 - if sampling_params.detokenize: + if sampling_params.detokenize and self.detokenizer: new_char_count = self.detokenizer.decode_sequence_inplace( seq, sampling_params)