Experiments

ID	Engine	Model	Prompt	Evaluations	Pass@1	Compile@1
190	mistral	codestral-mamba-latest	instruct	280	0.57	0.64
76	mlx	ipetrukha/CodeQwen1.5-7B-4bit	completion	200	0.42	0.61
35	mlx	ipetrukha/CodeQwen1.5-7B-Chat-4bit	default	200	0.43	0.64
36	mlx	ipetrukha/Nxcode-CQ-7B-orpo-4bit	default	200	0.43	0.64
73	mlx	mlx-community/CodeLlama-7b-Instruct-hf-4bit-MLX	instruct	200	0.27	0.30
74	mlx	mlx-community/CodeLlama-13b-Instruct-hf-4bit-MLX	instruct	200	0.52	0.65
75	mlx	mlx-community/CodeLlama-34b-Instruct-hf-4bit	instruct	200	0.55	0.69
37	mlx	mlx-community/Codestral-22B-v0.1-4bit	instruct	200	0.81	0.84
9	mlx	mlx-community/codegemma-2b-4bit	completion	207	0.17	0.28
70	mlx	mlx-community/codegemma-7b-4bit	completion	200	0.26	0.48
10	mlx	mlx-community/codegemma-7b-it-4bit	default	200	0.41	0.48
20	mlx	mlx-community/deepseek-coder-1.3b-instruct-mlx	default	200	0.12	0.17
21	mlx	mlx-community/deepseek-coder-6.7b-instruct-hf-4bit-mlx	default	200	0.23	0.24
33	mlx	mlx-community/deepseek-coder-33b-instruct-hf-4bit-mlx	default	200	0.29	0.49
14	mlx	mlx-community/granite-3b-code-instruct-4bit	default	200	0.15	0.21
15	mlx	mlx-community/granite-8b-code-instruct-4bit	default	200	0.27	0.37
16	mlx	mlx-community/granite-20b-code-instruct-4bit	default	200	0.14	0.18
17	mlx	mlx-community/granite-34b-code-instruct-4bit	default	200	0.20	0.34
18	mlx	mlx-community/stable-code-3b-4bit	completion	200	0.11	0.14
19	mlx	mlx-community/stable-code-instruct-3b-4bit	default	200	0.17	0.20
11	mlx	mlx-community/starcoder2-3b-4bit	completion	200	0.26	0.38
12	mlx	mlx-community/starcoder2-7b-4bit	completion	200	0.40	0.55
13	mlx	mlx-community/starcoder2-15b-4bit	completion	200	0.36	0.47
69	openai	gpt-3.5-turbo	default	560	0.81	0.86
66	openai	gpt-4	default	560	0.82	0.90
67	openai	gpt-4-turbo	default	560	0.87	0.93
68	openai	gpt-4o	default	560	0.89	0.92
65	openai	gpt-4o-mini	default	560	0.86	0.90
185	transformers	01-ai/Yi-Coder-1.5B	completion	280	0.01	0.02
186	transformers	01-ai/Yi-Coder-1.5B-Chat	default	280	0.05	0.12
212	transformers	01-ai/Yi-Coder-1.5B-Chat	instruct	280	0.04	0.09
188	transformers	01-ai/Yi-Coder-9B	completion	280	0.17	0.24
187	transformers	01-ai/Yi-Coder-9B-Chat	default	280	0.33	0.43
213	transformers	01-ai/Yi-Coder-9B-Chat	instruct	280	0.37	0.51
112	transformers	NTQAI/Nxcode-CQ-7B-orpo	default	280	0.40	0.60
97	transformers	Qwen/CodeQwen1.5-7B	completion	280	0.41	0.50
98	transformers	Qwen/CodeQwen1.5-7B-Chat	default	280	0.38	0.60
194	transformers	Qwen/Qwen2.5-Coder-1.5B	completion	280	0.11	0.17
195	transformers	Qwen/Qwen2.5-Coder-1.5B-Instruct	default	280	0.15	0.23
193	transformers	Qwen/Qwen2.5-Coder-7B	completion	280	0.29	0.43
192	transformers	Qwen/Qwen2.5-Coder-7B-Instruct	default	280	0.43	0.53
214	transformers	Qwen/Qwen2.5-Coder-7B-Instruct	instruct	280	0.44	0.55
163	transformers	THUDM/codegeex2-6b	completion	280	0.04	0.05
111	transformers	THUDM/codegeex4-all-9b	instruct	280	0.53	0.69
165	transformers	WisdomShell/CodeShell-7B	completion	280	0.12	0.17
167	transformers	WisdomShell/CodeShell-7B-Chat	instruct	280	0.07	0.09
92	transformers	bigcode/starcoder2-3b	completion	280	0.22	0.34
93	transformers	bigcode/starcoder2-7b	completion	280	0.30	0.41
94	transformers	bigcode/starcoder2-15b	completion	280	0.35	0.42
169	transformers	bigcode/starcoder2-15b	instruct	280	0.46	0.52
197	transformers	bigcode/starcoder2-15b-instruct-v0.1	instruct	280	0.53	0.58
216	transformers	deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct	default	280	0.69	0.78
54	transformers	deepseek-ai/deepseek-coder-1.3b-instruct	default	280	0.10	0.15
55	transformers	deepseek-ai/deepseek-coder-6.7b-instruct	default	280	0.17	0.25
56	transformers	deepseek-ai/deepseek-coder-33b-instruct	default	280	0.32	0.44
25	transformers	google/codegemma-1.1-2b	completion	280	0.00	0.01
181	transformers	google/codegemma-1.1-2b	completion	280	0.01	0.02
182	transformers	google/codegemma-1.1-2b	instruct	280	0.00	0.01
26	transformers	google/codegemma-1.1-7b-it	default	280	0.36	0.45
22	transformers	google/codegemma-2b	completion	280	0.17	0.28
23	transformers	google/codegemma-7b	completion	280	0.25	0.41
24	transformers	google/codegemma-7b-it	default	280	0.30	0.40
60	transformers	ibm-granite/granite-3b-code-instruct	default	280	0.11	0.17
61	transformers	ibm-granite/granite-8b-code-instruct	default	280	0.24	0.33
90	transformers	ibm-granite/granite-20b-code-instruct	default	280	0.16	0.21
91	transformers	ibm-granite/granite-34b-code-instruct	default	280	0.31	0.40
161	transformers	m-a-p/OpenCodeInterpreter-DS-6.7B	completion	280	0.20	0.23
159	transformers	m-a-p/OpenCodeInterpreter-DS-33B	instruct	280	0.27	0.36
7	transformers	meta-llama/CodeLlama-7b-Instruct-hf	default	280	0.24	0.29
173	transformers	meta-llama/CodeLlama-7b-Instruct-hf	instruct	280	0.28	0.41
174	transformers	meta-llama/CodeLlama-7b-Instruct-hf	default	280	0.18	0.26
175	transformers	meta-llama/CodeLlama-7b-Instruct-hf	completion	280	0.26	0.41
176	transformers	meta-llama/CodeLlama-7b-Instruct-hf	instruct	280	0.28	0.39
145	transformers	meta-llama/CodeLlama-7b-Python-hf	completion	280	0.25	0.34
8	transformers	meta-llama/CodeLlama-13b-Instruct-hf	default	280	0.24	0.34
171	transformers	meta-llama/CodeLlama-13b-Instruct-hf	instruct	280	0.44	0.60
177	transformers	meta-llama/CodeLlama-13b-Instruct-hf	default	280	0.24	0.34
178	transformers	meta-llama/CodeLlama-13b-Instruct-hf	completion	280	0.41	0.50
179	transformers	meta-llama/CodeLlama-13b-Instruct-hf	instruct	280	0.44	0.57
147	transformers	meta-llama/CodeLlama-13b-Python-hf	instruct	280	0.46	0.61
50	transformers	meta-llama/CodeLlama-34b-Instruct-hf	default	280	0.27	0.39
172	transformers	meta-llama/CodeLlama-34b-Instruct-hf	instruct	280	0.41	0.62
89	transformers	meta-llama/CodeLlama-70b-Instruct-hf	instruct	280	0.51	0.66
99	transformers	mistralai/Codestral-22B-v0.1	default	280	0.69	0.74
215	transformers	mistralai/Codestral-22B-v0.1	instruct	280	0.77	0.82
95	transformers	stabilityai/stable-code-3b	completion	280	0.09	0.14
96	transformers	stabilityai/stable-code-instruct-3b	default	280	0.10	0.14
170	transformers	stabilityai/stable-code-instruct-3b	instruct	280	0.11	0.12
222	vllm	deepseek-ai/DeepSeek-Coder-V2-Instruct	default	280	0.82	0.84
221	vllm	google/codegemma-1.1-2b	instruct	100	0.00	0.00
223	vllm	ibm-granite/granite-3b-code-instruct	default	30	0.43	0.47
218	vllm	stabilityai/stable-code-3b	instruct	280	0.12	0.15