(*
 * Copyright (c) 2001 Stefan Kral
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *)

open List
open Util
open P4Basics
open StateMonad

(* In this module, SIMD registers are reallocated using a LRU heuristic.
 * All integer instructions remain unchanged. Also, SIMD register-spills
 * are not touched. *)

(****************************************************************************)

let fetchSubstsM   = fetchStateM >>= fun s -> unitM (get1of3 s)
let fetchFreeRegsM = fetchStateM >>= fun s -> unitM (get2of3 s)
let fetchInstrsM   = fetchStateM >>= fun s -> unitM (get3of3 s)

let storeSubstsM x   = fetchStateM >>= fun s -> storeStateM (repl1of3 x s)
let storeFreeRegsM x = fetchStateM >>= fun s -> storeStateM (repl2of3 x s)
let storeInstrsM x   = fetchStateM >>= fun s -> storeStateM (repl3of3 x s)

(****************************************************************************)

let nextReader reg annotatedinstrs = 
  let rec loop before_rev = function
    | [] -> None
    | ((_,_,instr) as x)::xs ->
	if p4rinstrReadsP4rmmxreg reg instr then
	  Some(before_rev,x,xs)
	else if p4rinstrWritesP4rmmxreg reg instr then
	  None
	else loop (x::before_rev) xs
  in loop [] annotatedinstrs

let availableReg reg instrs = optionIsNone (nextReader reg instrs)

let addInstrM instr = fetchInstrsM >>= consM instr >>= storeInstrsM

let freeRegM reg = fetchFreeRegsM >>= fun regs -> storeFreeRegsM (regs @ [reg])

let getSubstM reg = fetchSubstsM >>= fun m -> unitM (RSimdRegMap.find reg m)

let filterSubstM reg = 
  fetchSubstsM >>= fun map -> storeSubstsM (RSimdRegMap.remove reg map)

let addSubstM reg reg' = 
  fetchSubstsM >>= fun map -> storeSubstsM (RSimdRegMap.add reg reg' map)

let allocRegM x = 
  fetchFreeRegsM >>= function
    | []     -> failwith "allocRegM: cannot allocate register!"
    | x'::xs -> addSubstM x x' >> storeFreeRegsM xs >> unitM x'

let regMaybeDiesM reg reg' instrs = match nextReader reg instrs with
  | None   -> freeRegM reg' >> filterSubstM reg
  | Some _ -> unitM ()

(****************************************************************************)

let substRegsInP4rinstr map = function
  | P4R_SimdSpill(s,d) 		      -> P4R_SimdSpill(map s,d)
  | P4R_SimdLoad(size,addr,d)	      -> P4R_SimdLoad(size,addr,map d)
  | P4R_SimdStore(s,size,addr)	      -> P4R_SimdStore(map s,size,addr)
  | P4R_SimdLoad1(addr,pos,d)	      -> P4R_SimdLoad1(addr,pos,map d)
  | P4R_SimdStore1(pos,s,addr)	      -> P4R_SimdStore1(pos,map s,addr)
  | P4R_SimdBinOp(op,s,sd) 	      -> P4R_SimdBinOp(op,map s, map sd)
  | P4R_SimdBinOpMem(op,memop,sd)     -> P4R_SimdBinOpMem(op,memop,map sd)
  | P4R_SimdUnaryOp(op,sd) 	      -> P4R_SimdUnaryOp(op,map sd)
  | P4R_SimdCpyUnaryOp(op,s,d) 	      -> P4R_SimdCpyUnaryOp(op,map s,map d)
  | P4R_SimdCpyUnaryOpMem(op,memop,d) -> P4R_SimdCpyUnaryOpMem(op,memop,map d)
  | x -> x

let swap2regs r1 r2 (t,cplen,instr) =
  let map x = if x=r1 then r2 else if x=r2 then r1 else x in
    (t,cplen,substRegsInP4rinstr map instr)

(* checks that the consumer of s and the consumer of d also writes s resp. d *)
let hasShorterCPLen s d instrs = match nextReader s instrs with
  | Some(_,(_,cp_s,s_instr),_) when p4rinstrWritesP4rmmxreg s s_instr ->
      (match nextReader d instrs with
	 | Some(_,(_,cp_d,d_instr),_) when p4rinstrWritesP4rmmxreg d d_instr ->
	     cp_d > cp_s + 2
	 | _ -> false)
  | _ -> false

(****************************************************************************)

(* requires stackcell-indices to be unique *)
let rec renameRegistersM = function
  | [] -> fetchInstrsM >>= fun instrs' -> unitM (List.rev instrs')
  | x::xs -> renameRegisters1M xs x

and renameRegisters1M instrs ((_,_,instr) as triple) = match instr with
  | P4R_IntLoadEA _
  | P4R_IntLoadMem _
  | P4R_IntStoreMem _
  | P4R_IntUnaryOp _
  | P4R_IntUnaryOpMem _
  | P4R_IntCpyUnaryOp _
  | P4R_IntBinOp _ 
  | P4R_IntBinOpMem _ 
  | P4R_Label _
  | P4R_Jump _
  | P4R_CondBranch _
  | P4R_Ret 
  | P4R_SimdPromiseCellSize _
  | P4R_SimdLoadStoreBarrier ->
      addInstrM instr >> 
	renameRegistersM instrs

  | P4R_SimdLoad(s_size,s_addr,d) ->
      allocRegM d >>= fun d' ->
	addInstrM (P4R_SimdLoad(s_size,s_addr,d')) >>
	  renameRegistersM instrs

  | P4R_SimdStore(s,d_size,d_addr) ->
      getSubstM s >>= fun s' ->
	addInstrM (P4R_SimdStore(s',d_size,d_addr)) >>
	  regMaybeDiesM s s' instrs >>
	    renameRegistersM instrs

  | P4R_SimdLoad1(s_addr,d_pos,d) ->
      fetchSubstsM >>= fun m -> 
 	(if RSimdRegMap.mem d m then 
	   unitM (RSimdRegMap.find d m)
	 else
	   allocRegM d) >>= fun d' ->		(* that may not be necessary! *)
	addInstrM (P4R_SimdLoad1(s_addr,d_pos,d')) >>
	  renameRegistersM instrs

  | P4R_SimdStore1(s_pos,s,d_addr) ->
      getSubstM s >>= fun s' ->
	addInstrM (P4R_SimdStore1(s_pos,s',d_addr)) >>
	  regMaybeDiesM s s' instrs >>
	    renameRegistersM instrs

  | P4R_SimdSpill(s,d_cellidx) ->
      getSubstM s >>= fun s' ->
        addInstrM (P4R_SimdSpill(s',d_cellidx)) >>
	  regMaybeDiesM s s' instrs >>
	    renameRegistersM instrs

  | P4R_SimdBinOp(op,s,sd) ->
      getSubstM s  >>= fun s' ->
      getSubstM sd >>= fun sd' ->
	addInstrM (P4R_SimdBinOp(op,s',sd')) >>
	  regMaybeDiesM s s' instrs >>
	    renameRegistersM instrs

  | P4R_SimdBinOpMem(op,s_memop,sd) ->
      getSubstM sd >>= fun sd' ->
        addInstrM (P4R_SimdBinOpMem(op,s_memop,sd')) >>
	  renameRegistersM instrs

  | P4R_SimdUnaryOp(op,sd) ->
      getSubstM sd >>= fun sd' ->
        addInstrM (P4R_SimdUnaryOp(op,sd')) >>
	  renameRegistersM instrs

  | P4R_SimdCpyUnaryOpMem(op,memop,d) ->
      let (pre,(t,cplen,instr),post) = optionToValue (nextReader d instrs) in
	(match instr with
	   | P4R_SimdBinOp(op,s,sd) when s=d &&	availableReg s post ->
	       let _ = debugOutputString "ld+binop ==> binopmem (1)" in
	       let instr' = P4R_SimdBinOpMem(op,memop,sd) in
		 renameRegistersM (rev_append pre ((t,cplen,instr')::post))

	   | P4R_SimdBinOp(op,s,sd)
	     when sd=d && p4simdbinopIsCommutative op && availableReg s post ->
	       let _ = debugOutputString "ld+binop ==> binopmem (2)" in
	       let instr' = P4R_SimdBinOpMem(p4simdbinopToCommutativeCounterpart op,memop,s)
	       and post'  = map (swap2regs s sd) post in
		 renameRegistersM (rev_append pre ((t,cplen,instr')::post'))

	   | _ ->
	       allocRegM d >>= fun d' ->
		 addInstrM (P4R_SimdCpyUnaryOpMem(op,memop,d')) >>
		   renameRegistersM instrs)

  | P4R_SimdCpyUnaryOp(P4_FPId,s,d) when s=d ->
      let _ = debugOutputString "useless copy elimination (1)" in
	renameRegistersM instrs

  | P4R_SimdCpyUnaryOp(P4_FPId,s,d) when availableReg s instrs ->
      let _ = debugOutputString "useless copy elimination (2)" in
      getSubstM s >>= fun s' ->
	filterSubstM s >>
	addSubstM d s' >>
	  renameRegistersM instrs

  | P4R_SimdCpyUnaryOp(op,s,d) ->
      getSubstM s >>= fun s' ->
      allocRegM d >>= fun d' ->
	addInstrM (P4R_SimdCpyUnaryOp(op,s',d')) >> 
	  renameRegistersM instrs


(* EXPORTED FUNCTIONS ******************************************************)

let p4rinstrsToRegisterReallocated annotatedinstrs = 
  runM renameRegistersM annotatedinstrs (RSimdRegMap.empty, p4rmmxregs, [])

