neon-schedgen.ml (core): New type.

2010-09-01  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>

	* config/arm/neon-schedgen.ml (core): New type.
	(allCores): List of supported cores.
	(availability_table): Add supported cores.
	(collate_bypasses): Accept core as a parameter.
	(worst_case_latencies_and_bypasses): Accept core as a
	 parameter.
	(emit_insn_reservations): Accept core as a parameter.
	Use tuneStr and coreStr to get tune attribute and prefix
	for functional units.
	(emit_bypasses): Accept core name and use it.
	(calculate_per_core_availability_table): New.
	(filter_core): New.
	(calculate_core_availability_table): New.
	(main): Use calculate_core_availablity_table.
	* config/arm/cortex-a8-neon.md: Update copyright year.
	Regenerated from ml file and merged in.
	(neon_mrrc, neon_mrc): Rename to cortex_a8_neon_mrrc and
	cortex_a8_neon_mrc.

From-SVN: r163737
This commit is contained in:
Ramana Radhakrishnan 2010-09-01 15:28:13 +00:00 committed by Ramana Radhakrishnan
parent f685731072
commit 4600a8d197
3 changed files with 864 additions and 803 deletions

View file

@ -1,3 +1,24 @@
2010-09-01 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config/arm/neon-schedgen.ml (core): New type.
(allCores): List of supported cores.
(availability_table): Add supported cores.
(collate_bypasses): Accept core as a parameter.
(worst_case_latencies_and_bypasses): Accept core as a
parameter.
(emit_insn_reservations): Accept core as a parameter.
Use tuneStr and coreStr to get tune attribute and prefix
for functional units.
(emit_bypasses): Accept core name and use it.
(calculate_per_core_availability_table): New.
(filter_core): New.
(calculate_core_availability_table): New.
(main): Use calculate_core_availablity_table.
* config/arm/cortex-a8-neon.md: Update copyright year.
Regenerated from ml file and merged in.
(neon_mrrc, neon_mrc): Rename to cortex_a8_neon_mrrc and
cortex_a8_neon_mrc.
2010-09-01 Ian Bolton <ian.bolton@arm.com>
* Makefile.in (tree-switch-conversion.o): Update dependencies.

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,6 @@
(* Emission of the core of the Cortex-A8 NEON scheduling description.
Copyright (C) 2007, 2010 Free Software Foundation, Inc.
Contributed by CodeSourcery.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
@ -21,7 +20,14 @@
(* This scheduling description generator works as follows.
- Each group of instructions has source and destination requirements
specified. The source requirements may be specified using
specified and a list of cores supported. This is then filtered
and per core scheduler descriptions are generated out.
The reservations generated are prefixed by the name of the
core and the check is performed on the basis of what the tuning
string is. Running this will generate Neon scheduler descriptions
for all cores supported.
The source requirements may be specified using
Source (the stage at which all source operands not otherwise
described are read), Source_m (the stage at which Rm operands are
read), Source_n (likewise for Rn) and Source_d (likewise for Rd).
@ -83,6 +89,17 @@ type reservation =
| Ls of int
| Fmul_then_fadd | Fmul_then_fadd_2
type core = CortexA8 | CortexA9
let allCores = [CortexA8]
let coreStr = function
CortexA8 -> "cortex_a8"
| CortexA9 -> "cortex_a9"
let tuneStr = function
CortexA8 -> "cortexa8"
| CortexA9 -> "cortexa9"
(* This table must be kept as short as possible by conflating
entries with the same availability behavior.
@ -90,129 +107,136 @@ type reservation =
Second components: availability requirements, in the order in which
they should appear in the comments in the .md file.
Third components: reservation info
Fourth components: List of supported cores.
*)
let availability_table = [
(* NEON integer ALU instructions. *)
(* vbit vbif vbsl vorr vbic vnot vcls vclz vcnt vadd vand vorr
veor vbic vorn ddd qqq *)
"neon_int_1", [Source n2; Dest n3], ALU;
"neon_int_1", [Source n2; Dest n3], ALU, allCores;
(* vadd vsub qqd vsub ddd qqq *)
"neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU;
"neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU, allCores;
(* vsum vneg dd qq vadd vsub qdd *)
"neon_int_3", [Source n1; Dest n3], ALU;
"neon_int_3", [Source n1; Dest n3], ALU, allCores;
(* vabs vceqz vcgez vcbtz vclez vcltz vadh vradh vsbh vrsbh dqq *)
(* vhadd vrhadd vqadd vtst ddd qqq *)
"neon_int_4", [Source n2; Dest n4], ALU;
"neon_int_4", [Source n2; Dest n4], ALU, allCores;
(* vabd qdd vhsub vqsub vabd vceq vcge vcgt vmax vmin vfmx vfmn ddd ddd *)
"neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU;
"neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU, allCores;
(* vqneg vqabs dd qq *)
"neon_vqneg_vqabs", [Source n1; Dest n4], ALU;
"neon_vqneg_vqabs", [Source n1; Dest n4], ALU, allCores;
(* vmov vmvn *)
"neon_vmov", [Dest n3], ALU;
"neon_vmov", [Dest n3], ALU, allCores;
(* vaba *)
"neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU;
"neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU, allCores;
"neon_vaba_qqq",
[Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], ALU_2cycle;
[Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)],
ALU_2cycle, allCores;
(* vsma *)
"neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU;
"neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU, allCores;
(* NEON integer multiply instructions. *)
(* vmul, vqdmlh, vqrdmlh *)
(* vmul, vqdmul, qdd 16/8 long 32/16 long *)
"neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], Mul;
"neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], Mul_2cycle;
"neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6],
Mul, allCores;
"neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)],
Mul_2cycle, allCores;
(* vmul, vqdmul again *)
"neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar",
[Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle;
[Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle, allCores;
(* vmla, vmls *)
"neon_mla_ddd_8_16_qdd_16_8_long_32_16_long",
[Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul;
[Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul, allCores;
"neon_mla_qqq_8_16",
[Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle;
[Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)],
Mul_2cycle, allCores;
"neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long",
[Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], Mul_2cycle;
[Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)],
Mul_2cycle, allCores;
"neon_mla_qqq_32_qqd_32_scalar",
[Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], Mul_4cycle;
[Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)],
Mul_4cycle, allCores;
(* vmul, vqdmulh, vqrdmulh *)
(* vmul, vqdmul *)
"neon_mul_ddd_16_scalar_32_16_long_scalar",
[Source_n n2; Source_m n1; Dest n6], Mul;
[Source_n n2; Source_m n1; Dest n6], Mul, allCores;
"neon_mul_qqd_32_scalar",
[Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle;
[Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle, allCores;
(* vmla, vmls *)
(* vmla, vmla, vqdmla, vqdmls *)
"neon_mla_ddd_16_scalar_qdd_32_16_long_scalar",
[Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul;
[Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul, allCores;
(* NEON integer shift instructions. *)
(* vshr/vshl immediate, vshr_narrow, vshl_vmvh, vsli_vsri_ddd *)
"neon_shift_1", [Source n1; Dest n3], Shift;
(* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow;
"neon_shift_1", [Source n1; Dest n3], Shift, allCores;
(* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow, allCores;
vqshl_vrshl_vqrshl_ddd *)
"neon_shift_2", [Source n1; Dest n4], Shift;
"neon_shift_2", [Source n1; Dest n4], Shift, allCores;
(* vsli, vsri and vshl for qqq *)
"neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle;
"neon_vshl_ddd", [Source n1; Dest n1], Shift;
"neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle, allCores;
"neon_vshl_ddd", [Source n1; Dest n1], Shift, allCores;
"neon_vqshl_vrshl_vqrshl_qqq", [Source n1; Dest_n_after (1, n4)],
Shift_2cycle;
"neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift;
Shift_2cycle, allCores;
"neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift, allCores;
(* NEON floating-point instructions. *)
(* vadd, vsub, vabd, vmul, vceq, vcge, vcgt, vcage, vcagt, vmax, vmin *)
(* vabs, vneg, vceqz, vcgez, vcgtz, vclez, vcltz, vrecpe, vrsqrte, vcvt *)
"neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd;
"neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd, allCores;
"neon_fp_vadd_qqq_vabs_qq", [Source n2; Dest_n_after (1, n5)],
Fadd_2cycle;
Fadd_2cycle, allCores;
(* vsum, fvmx, vfmn *)
"neon_fp_vsum", [Source n1; Dest n5], Fadd;
"neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul;
"neon_fp_vsum", [Source n1; Dest n5], Fadd, allCores;
"neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul, allCores;
"neon_fp_vmul_qqd", [Source_n n2; Source_m n1; Dest_n_after (1, n5)],
Fmul_2cycle;
Fmul_2cycle, allCores;
(* vmla, vmls *)
"neon_fp_vmla_ddd",
[Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd;
[Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd, allCores;
"neon_fp_vmla_qqq",
[Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n9)],
Fmul_then_fadd_2;
Fmul_then_fadd_2, allCores;
"neon_fp_vmla_ddd_scalar",
[Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd;
[Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd, allCores;
"neon_fp_vmla_qqq_scalar",
[Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n9)],
Fmul_then_fadd_2;
"neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd;
Fmul_then_fadd_2, allCores;
"neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd, allCores;
"neon_fp_vrecps_vrsqrts_qqq", [Source n2; Dest_n_after (1, n9)],
Fmul_then_fadd_2;
Fmul_then_fadd_2, allCores;
(* NEON byte permute instructions. *)
(* vmov; vtrn and vswp for dd; vzip for dd; vuzp for dd; vrev; vext for dd *)
"neon_bp_simple", [Source n1; Dest n2], Permute 1;
(* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1};
"neon_bp_simple", [Source n1; Dest n2], Permute 1, allCores;
(* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}, allCores;
similarly for vtbx *)
"neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2;
"neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2, allCores;
(* all the rest *)
"neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3;
"neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3, allCores;
(* NEON load/store instructions. *)
"neon_ldr", [Dest n1], Ls 1;
"neon_str", [Source n1], Ls 1;
"neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2;
"neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3;
"neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2;
"neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3;
"neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4;
"neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2;
"neon_vst1_3_4_regs", [Source n1], Ls 3;
"neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4;
"neon_vst3_vst4", [Source n1], Ls 4;
"neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3;
"neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5;
"neon_vst1_vst2_lane", [Source n1], Ls 2;
"neon_vst3_vst4_lane", [Source n1], Ls 3;
"neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3;
"neon_ldr", [Dest n1], Ls 1, allCores;
"neon_str", [Source n1], Ls 1, allCores;
"neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2, allCores;
"neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3, allCores;
"neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2, allCores;
"neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3, allCores;
"neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4, allCores;
"neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2, allCores;
"neon_vst1_3_4_regs", [Source n1], Ls 3, allCores;
"neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4, allCores;
"neon_vst3_vst4", [Source n1], Ls 4, allCores;
"neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3, allCores;
"neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5, allCores;
"neon_vst1_vst2_lane", [Source n1], Ls 2, allCores;
"neon_vst3_vst4_lane", [Source n1], Ls 3, allCores;
"neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3, allCores;
(* NEON register transfer instructions. *)
"neon_mcr", [Dest n2], Permute 1;
"neon_mcr_2_mcrr", [Dest n2], Permute 2;
"neon_mcr", [Dest n2], Permute 1, allCores;
"neon_mcr_2_mcrr", [Dest n2], Permute 2, allCores;
(* MRC instructions are in the .tpl file. *)
]
@ -221,7 +245,7 @@ let availability_table = [
required. (It is also possible that an entry in the table has no
source requirements.) *)
let calculate_sources =
List.map (fun (name, avail, res) ->
List.map (fun (name, avail, res, cores) ->
let earliest_stage =
List.fold_left
(fun cur -> fun info ->
@ -331,7 +355,7 @@ let pick_latency largest worst guards =
of one bypass from this producer to any particular consumer listed
in LATENCIES.) Use a hash table to collate bypasses with the
same latency and guard. *)
let collate_bypasses (producer_name, _, _, _) largest latencies =
let collate_bypasses (producer_name, _, _, _) largest latencies core =
let ht = Hashtbl.create 42 in
let keys = ref [] in
List.iter (
@ -350,7 +374,7 @@ let collate_bypasses (producer_name, _, _, _) largest latencies =
(if (try ignore (Hashtbl.find ht (guard, latency)); false
with Not_found -> true) then
keys := (guard, latency) :: !keys);
Hashtbl.add ht (guard, latency) consumer
Hashtbl.add ht (guard, latency) ((coreStr core) ^ "_" ^ consumer)
end
) latencies;
(* The hash table now has bypasses collated so that ones with the
@ -372,7 +396,7 @@ let collate_bypasses (producer_name, _, _, _) largest latencies =
the output in such a way that all bypasses with the same producer
and latency are together, and so that bypasses with the worst-case
latency are ignored. *)
let worst_case_latencies_and_bypasses =
let worst_case_latencies_and_bypasses core =
let rec f (worst_acc, bypasses_acc) prev xs =
match xs with
[] -> (worst_acc, bypasses_acc)
@ -400,7 +424,7 @@ let worst_case_latencies_and_bypasses =
(* Having got the largest latency, collect all bypasses for
this producer and filter out those with that larger
latency. Record the others for later emission. *)
let bypasses = collate_bypasses producer largest latencies in
let bypasses = collate_bypasses producer largest latencies core in
(* Go on to process remaining producers, having noted
the result for this one. *)
f ((producer_name, producer_avail, largest,
@ -444,14 +468,18 @@ let write_comment producer avail =
in
f avail 0
(* Emit a define_insn_reservation for each producer. The latency
written in will be its worst-case latency. *)
let emit_insn_reservations =
List.iter (
let emit_insn_reservations core =
let corestring = coreStr core in
let tunestring = tuneStr core
in List.iter (
fun (producer, avail, latency, reservation) ->
write_comment producer avail;
Printf.printf "(define_insn_reservation \"%s\" %d\n" producer latency;
Printf.printf " (and (eq_attr \"tune\" \"cortexa8\")\n";
Printf.printf "(define_insn_reservation \"%s_%s\" %d\n"
corestring producer latency;
Printf.printf " (and (eq_attr \"tune\" \"%s\")\n" tunestring;
Printf.printf " (eq_attr \"neon_type\" \"%s\"))\n" producer;
let str =
match reservation with
@ -467,7 +495,7 @@ let emit_insn_reservations =
| Fmul_then_fadd -> "fmul_then_fadd"
| Fmul_then_fadd_2 -> "fmul_then_fadd_2"
in
Printf.printf " \"cortex_a8_neon_%s\")\n\n" str
Printf.printf " \"%s_neon_%s\")\n\n" corestring str
)
(* Given a guard description, return the name of the C function to
@ -480,10 +508,12 @@ let guard_fn g =
| Guard_none -> assert false
(* Emit a define_bypass for each bypass. *)
let emit_bypasses =
let emit_bypasses core =
List.iter (
fun (producer, consumers, latency, guard) ->
Printf.printf "(define_bypass %d \"%s\"\n" latency producer;
Printf.printf "(define_bypass %d \"%s_%s\"\n"
latency (coreStr core) producer;
if guard = Guard_none then
Printf.printf " \"%s\")\n\n" consumers
else
@ -493,11 +523,21 @@ let emit_bypasses =
end
)
let calculate_per_core_availability_table core availability_table =
let table = calculate_sources availability_table in
let worst_cases, bypasses = worst_case_latencies_and_bypasses core table in
emit_insn_reservations core (List.rev worst_cases);
Printf.printf ";; Exceptions to the default latencies.\n\n";
emit_bypasses core bypasses
let calculate_core_availability_table core availability_table =
let filter_core = List.filter (fun (_, _, _, cores)
-> List.exists ((=) core) cores)
in calculate_per_core_availability_table core (filter_core availability_table)
(* Program entry point. *)
let main =
let table = calculate_sources availability_table in
let worst_cases, bypasses = worst_case_latencies_and_bypasses table in
emit_insn_reservations (List.rev worst_cases);
Printf.printf ";; Exceptions to the default latencies.\n\n";
emit_bypasses bypasses
List.map (fun core -> calculate_core_availability_table
core availability_table) allCores