A Haskell Compiler

David Terei

Why understand how GHC works?

The pipeline of GHC

Haskell -> GHC Haskell -> Core -> STG -> Cmm -> Assembly

GHC supports Haskell on top of an unsafe variant

Primitive types (GHC.Prim):

All primitive types are unlifted – can’t contain .

ghci> :browse GHC.Prim

GHC supports Haskell on top of an unsafe variant

All variants of Int (In8, Int16, Int32, Int64) are represented internally by Int# (64bit) on a 64bit machine.

data Int32 = I32# Int# deriving (Eq, Ord, Typeable)

instance Num Int32 where
    (I32# x#) + (I32# y#)  = I32# (narrow32Int# (x# +# y#))
    ...

Data constructors lift a type, allowing .

GHC implements IO through the RealWorld token

newtype IO a = IO (State# RealWorld -> (# State# RealWorld, a #))

returnIO :: a -> IO a
returnIO x = IO $ \ s -> (# s, x #)

bindIO :: IO a -> (a -> IO b) -> IO b
bindIO (IO m) k = IO $ \ s -> case m s of (# new_s, a #) -> unIO (k a) new_s

We’ve seen RealWorld Before

comp :: Handle -> IO ()
comp = do name <- hGetLine h
          hPutStrLn h name
comp :: GHC.IO.Handle.Types.Handle -> GHC.Prim.State# GHC.Prim.RealWorld
        -> (# GHC.Prim.State# GHC.Prim.RealWorld, () #)
comp = \h rw1 ->
  case GHC.IO.Handle.Text.hGetLine h rw1 of
    (# rw2, str #) -> GHC.IO.Handle.Text.hPutStr h str rw2

We implement unsafe IO operations by throwing away the world

unsafePerformIO :: IO a -> a
unsafePerformIO m = unsafeDupablePerformIO (noDuplicate >> m)

unsafeDupablePerformIO  :: IO a -> a
unsafeDupablePerformIO (IO m) = lazy (case m realWorld# of (# _, r #) -> r)

Core: a small function intermediate language

variables, literals, let, case, lambda abstraction, application
ghc -ddump-simpl M.hs > M.core

Core in one slide

data Expr b -- "b" for the type of binders,
  = Var    Id
  | Lit   Literal
  | App   (Expr b) (Arg b)
  | Lam   b (Expr b)
  | Let   (Bind b) (Expr b)
  | Case  (Expr b) b Type [Alt b]

  | Type  Type
  | Cast  (Expr b) Coercion
  | Coercion Coercion

  | Tick  (Tickish Id) (Expr b)

data Bind b = NonRec b (Expr b)
            | Rec [(b, (Expr b))]

type Arg b = Expr b

type Alt b = (AltCon, [b], Expr b)

data AltCon = DataAlt DataCon | LitAlt  Literal | DEFAULT

Lets now look at how Haskell is compiled to Core.

GHC Haskell to Core: monomorphic functions

Haskell

idChar :: Char -> Char
idChar c = c

Core

idChar :: GHC.Types.Char -> GHC.Types.Char
[GblId, Arity=1]
idChar = \ (c :: GHC.Types.Char) -> c

GHC Haskell to Core: polymorphic functions

Haskell

id :: a -> a
id x = x

idChar2 :: Char -> Char
idChar2 = id

Core

id :: forall a. a -> a
id = \ (@ a) (x :: a) -> x

idChar2 :: GHC.Types.Char -> GHC.Types.Char
idChar2 = id @ GHC.Types.Char

GHC Haskell to Core: polymorphic functions

Haskell

map :: (a -> b) -> [a] -> [b]
map _ []     = []
map f (x:xs) = f x : map f xs

Core

map :: forall a b. (a -> b) -> [a] -> [b]
map = \ (@ a) (@ b) (f :: a -> b) (xs :: [a]) ->
    case xs of _
      []     -> GHC.Types.[] @ b
      : y ys -> GHC.Types.: @ b (f y) (map @ a @ b f ys)

New case syntax to make obvious that evaluation is happening:

case e of result
  __DEFAULT -> result

Where transformed to let

Haskell

dox :: Int -> Int
dox n = x * x
    where x = n + 2

Core

dox :: GHC.Types.Int -> GHC.Types.Int
dox = \ (n :: GHC.Types.Int) ->
    let x :: GHC.Types.Int
        x = GHC.base.plusInt n (GHC.Types.I# 2)
    in GHC.base.multInt x x

Patterns matching transformed to case statements

Haskell

iff :: Bool -> a -> a -> a
iff True  x _ = x
iff False _ y = y

Core

iff :: forall a. GHC.Bool.Bool -> a -> a -> a
iff = \ (@ a) (d :: GHC.Bool.Bool) (x :: a) (y :: a) ->
    case d of _
      GHC.Bool.False -> y
      GHC.Bool.True  -> x

Type classes transformed to dictionaries

Haskell

typeclass MyEnum a where
   toId  :: a -> Int
   fromId :: Int -> a

Core

data MyEnum a = DMyEnum (a -> Int) (Int -> a)

toId :: forall a. MyEnum a -> a -> GHC.Types.Int
toId = \ (@ a) (d :: MyEnum a) (x :: a) ->
    case d of _
      DMyEnum f1 _ -> f1 x

fromId :: forall a. MyEnum a -> GHC.Types.Int -> a
fromId = \ (@ a) (d :: MyEnum a) (x :: a) ->
    case d of _
      DMyEnum _ f2 -> f2 x

A dictionary constructed for each instance

Haskell

instance MyEnum Int where
   toId = id
   fromId = id

Core

fMyEnumInt :: MyEnum GHC.Types.Int
fMyEnumInt =
    DMyEnum @ GHC.Types.Int
      (id @ GHC.Types.Int)
      (id @ GHC.Types.Int)

Dictionaries constructed from dictionaries

Haskell

instance (MyEnum a) => MyEnum (Maybe a) where
  toId (Nothing) = 0
  toId (Just n)  = toId n
  fromId 0       = Nothing
  fromId n       = Just $ fromId n

Core

fMyEnumMaybe :: forall a. MyEnum a -> MyEnum (Maybe a)
fMyEnumMaybe = \ (@ a) (dict :: MyEnum a) ->
  DMyEnum @ (Maybe a)
    (fMyEnumMaybe_ctoId @ a dict)
    (fMyEnumMaybe_cfromId @ a dict)

fMyEnumMaybe_ctoId :: forall a. MyEnum a -> Maybe a -> Int
fMyEnumMaybe_ctoId = \ (@ a) (dict :: MyEnum a) (mx :: Maybe a) ->
  case mx of _
    Nothing -> I# 0
    Just n  -> case (toId @ a dict n) of _ { I# y -> I# (1 +# y) }

UNPACK unboxes types

Haskell

data Point = Point {-# UNPACK #-} !Int
                   {-# UNPACK #-} !Int

Core

data Point = Point Int# Int#

UNPACK not always a good idea

Haskell

addP :: P -> Int
addP (P x y ) = x + y

Core

addP :: P -> Int
addP = \ (p :: P) ->
    case p of _
      P x y -> case +# x y of z
        __DEFAULT -> I# z

UNPACK not always a good idea

Haskell

module M where

{-# NOINLINE add #-}
add x y = x + y

module P where

addP_bad (P x y) = add x y

Core

addP_bad = \ (p :: P) ->
    case p of _
      P x y ->
        let x' = I# x
            y' = I# y
        in M.add x' y'

Core summary

Middle of GHC: Core -> Core

A lot of the optimizations that GHC does is through core to core transformations.

Lets look at two of them:

Fun Fact: Estimated that functional languages gain 20 - 40%
improvement from inlining Vs. imperative languages which gain 10 - 15%

Strictness & unboxing

Consider this factorial implementation in Haskell:

fac :: Int -> Int -> Int
fac x 0 = a
fac x n = fac (n*x) (n-1)

Strictness & unboxing

Core

fac :: Int -> Int -> Int
fac = \ (x :: Int) (n :: Int) ->
    case n of _
      I# n# -> case n# of _
                0#        -> x
                __DEFAULT -> let one = I# 1
                                 n' = n - one
                                 x' = n * x
                             in  fac x' n'

GHC with strictness analysis

Compile fac with optimizations.

$wfac :: Int# -> Int# -> Int#
$wfac = \ x# n# ->
    case n# of _
      0# -> x#
      _  -> case (n# -# 1#) of n'#
              _ -> case (n# *# x#) of x'#
                     _ -> $wfac x'# n'#

fac :: Int -> Int -> Int
fac = \ a n ->
    case a of
      I# a# -> case n of
                 I# n# -> case ($wfac a# n#) of
                            r# -> I# r#

SpecConstr: Extending strictness analysis to paths

The idea of the SpecConstr pass is to extend the strictness and unboxing from before but to functions where arguments aren’t strict in every code path.

Consider this Haskell function:

drop :: Int -> [a] -> [a]
drop n []     = []
drop 0 xs     = xs
drop n (x:xs) = drop (n-1) xs

SpecConstr: Extending strictness analysis to paths

So we get this code without extra optimization:

drop n xs = case xs of
  []     -> []
  (y:ys) -> case n of
              I# n# -> case n# of
                          0 -> []
                          _ -> let n' = I# (n# -# 1#)
                               in drop n' ys

SpecConstr

The SpecConstr pass takes advantage of this to create a specialised version of drop that is only called after we have passed the first check.

-- works with unboxed n
drop' n# xs = case xs of
               []     -> []
               (y:ys) -> case n# of
                           0# -> []
                           _  -> drop' (n# -# 1#) xs

drop n xs = case xs of
              []     -> []
              (y:ys) -> case n of
                          I# n# -> case n# of
                                      0 -> []
                                      _ -> drop' (n# -# 1#) xs

STG code

ghc -ddump-stg A.hs > A.stg

STG code

Haskell

map :: (a -> b) -> [a] -> [b]
map f []     = []
map f (x:xs) = f x : map f xs

STG

map :: forall a b. (a -> b) -> [a] -> [b]
map = \r [f xs]
        case xs of _
          []     -> [] []
          : z zs -> let bds = \u [] map f zs
                        bd  = \u [] f z
                    in : [bd bds]

Graph reduction as a computational model for Haskell

Graph reduction is a good computational model for lazy functional languages.

f g = let x = 2 + 2
      in (g x, x)

Graph reduction as a computational model for Haskell

Graph reduction is a good computational model for lazy functional languages.

f g = let x = 2 + 2
      in (g x, x)

Graph reduction as a computational model for Haskell

Graph reduction is a good computational model for lazy functional languages.

Can think of your Haskell program as progressing by either adding new nodes to the graph or reducing existing ones.

GHC execution model

Closure representation

Closure Info Table

Data closure

data G = G (Int -> Int) {-# UNPACK #-} !Int
jmp Sp[0]

Function closures

f = \x -> let g = \y -> x + y
          in g x

Partial application closures (PAP)

foldr (:)

Thunk closures

range = [1..100]

Calling convention

Handling thunk updates

Handling thunk updates

mk :: Int -> Int
mk x = x + 1
// thunk entry - setup stack, evaluate x
mk_entry()
    entry:
        if (Sp - 24 < SpLim) goto gc;       // check for enough stack space

        I64[Sp - 16] = stg_upd_frame_info;  // setup update frame (closure type)
        I64[Sp -  8] = R1;                  // set thunk to be updated (payload)

        I64[Sp - 24] = mk_exit;             // setup continuation (+)

        Sp = Sp - 24;                       // decrease stack
        R1 = I64[R1 + 8];                   // grab 'x' from environment
        jump I64[R1] ();                    // eval 'x'

    gc: jump stg_gc_enter_1 ();

Handling thunk updates

mk :: Int -> Int
mk x = x + 1
// thunk exit - setup value on heap, tear-down stack
mk_exit()
    entry:
        Hp = Hp + 16;
        if (Hp > HpLim) goto gc;

        v::I64 = I64[R1] + 1;               // perform ('x' + 1)

        I64[Hp - 8] = GHC_Types_I_con_info; // setup Int closure
        I64[Hp + 0] = v::I64;

        R1 = Hp;                            // point R1 to computed thunk value
        Sp = Sp + 8;                        // pop stack
        jump (I64[Sp + 0]) ();              // jump to continuation ('stg_upd_frame_info')

    gc: HpAlloc = 16;
        jump stg_gc_enter_1 ();

stg_upd_frame_info code updates a thunk with its value

stg_upd_frame_info code updates a thunk with its value

Thunk closure:

Avoiding entering values with pointer tagging

Avoiding entering values

Our example code from before:

mk :: Int -> Int
mk x = x + 1

Changes with pointer tagging:

mk_entry()
    entry:
         ...
         R1 = I64[R1 + 16];          // grab 'x' from environment
         if (R1 & 7 != 0) goto cxd;  // check if 'x' is eval'd
         jump I64[R1] ();            // not eval'd so eval
    cxd: jump mk_exit ();            // 'x' eval'd so jump to (+) continuation
}

mk_exit()
    cx0:
        I64[Hp - 8] = ghczmprim_GHCziTypes_Izh_con_info; // setup Int closure
        I64[Hp + 0] = v::I64;               // setup Int closure
        R1 = Hp - 7;                        // point R1 to computed thunk value (with tag)
        ...
}

Pointer tagging makes your own data types efficient

data MyBool a = MTrue a | MFalse a

Pointer tagging avoids looking up the info table

Haskell

mycase :: Maybe Int -> Int
mycase x = case x of Just z -> z; Nothing -> 10

Cmm

mycase_entry()                          // corresponds to forcing 'x'
    entry:
        R1 = R2;                        // R1 = 'x'
        I64[Sp - 8] = mycase_exit;      // setup case continuation
        Sp = Sp - 8;
        if (R1 & 7 != 0) goto crL;      // check pointer tag to see if x eval'd
        jump I64[R1] ();                // x not eval'd, so eval
    exit:
        jump mycase_exit ();            // jump to case continuation

mycase_exit()                           // case continuation
    entry:
        v::I64 = R1 & 7;                // get tag bits of 'x' and put in local variable 'v'
        if (_crD::I64 >= 2) goto crE;   // can use tag bits to check which constructor we have
        R1 = stg_INTLIKE_closure+417;   // 'Nothing' case
        Sp = Sp + 8;
        jump (I64[Sp + 0]) ();          // jump to continuation ~= return
    exit:
        R1 = I64[R1 + 6];               // get 'z' thunk inside Just
        Sp = Sp + 8;
        R1 = R1 & (-8);                 // clear tags on 'z'
        jump I64[R1] ();                // force 'z' thunk

Bringing it all home

No lecture on Compilers is complete without assembly code!

add :: Int -> Int -> Int
add x y = x + y + 2
A_add_info:
.LcvZ:
    leaq -16(%rbp),%rax
    cmpq %r15,%rax
    jb .Lcw1
    movq %rsi,-8(%rbp)
    movq %r14,%rbx
    movq $sul_info,-16(%rbp)
    addq $-16,%rbp
    testq $7,%rbx
    jne sul_info
    jmp *(%rbx)
.Lcw1:
    movl $A_add_closure,%ebx
    jmp *-8(%r13)

sul_info:
.LcvS:
    movq 8(%rbp),%rax
    movq 7(%rbx),%rcx
    movq %rcx,8(%rbp)
    movq %rax,%rbx
    movq $suk_info,0(%rbp)
    testq $7,%rbx
    jne suk_info
    jmp *(%rbx)

suk_info:
.LcvK:
    addq $16,%r12
    cmpq 144(%r13),%r12
    ja .LcvP
    movq 7(%rbx),%rax
    addq $2,%rax
    movq 8(%rbp),%rcx
    addq %rax,%rcx
    movq $ghczmprim_GHCziTypes_Izh_con_info,-8(%r12)
    movq %rcx,0(%r12)
    leaq -7(%r12),%rbx
    addq $16,%rbp
    jmp *0(%rbp)
.LcvP:
    movq $16,184(%r13)
.LcvQ:
    jmp *-16(%r13)

Finished!

Resources & References

Here are some resources to learn about GHC, they were also used to create these slides: