Dev.Poga

A Taste of OCaml's Predictable Performance

Learn OCaml the Hard Way is a series about learning OCaml from the ground up:


eqaf, a constant-time compare function implementation in OCaml, is a great case to demonstrate the predictable performance of OCaml’s compiler. Why?


This is the first issue of Learn OCaml the Hard Way. Subscribe to get notified when a new article is out.


About Garbage Collection

It’s hard to have a stable runtime performance for languages that comes with a garbage collector. To have a predictable runtime performance characteristic in these languages, the following rules are usually required to be followed religiously:

Hence, the resulting code are usually unidiomatic and hard to maintain.

Compiler Explorer

Compiler Explorer is a handy tool for exploring compilers and its assembly output. It has built-in OCaml support and removes boilerplate automatically. I recommend you to test the following example with it.


Code

Here’s the implementation of eqaf’s equal.

let[@inline] get x i = String.unsafe_get x i |> Char.code

external unsafe_get_int16 : string -> int -> int = "%caml_string_get16u"
let[@inline] get16 x i = unsafe_get_int16 x i

let equal ~ln a b =
  let l1 = ln asr 1 in
  let r = ref 0 in
  for i = 0 to pred l1 do r := !r lor (get16 a (i * 2) lxor get16 b (i * 2)) done ;
  for _ = 1 to ln land 1 do r := !r lor (get a (ln - 1) lxor get b (ln - 1)) done ;
  !r = 0

let equal a b =
  let al = String.length a in
  let bl = String.length b in
  if al <> bl
  then false
  else equal ~ln:al a b

Besides basic OCaml syntaxes[1], interesting bits in the example are:

Although the OCaml code is quite low-level (and not very functional). It’s still cleaner than most constant-time compare functions implemented in assembly.

Read the asm

Here’s the resulting assembly of the previous example, copied from the eqaf source. You can get the same output via Compiler Explorer.

let[@inline] get x i = String.unsafe_get x i |> Char.code
(* XXX(dinosaure): we use [unsafe_get] to avoid jump to exception:
        sarq    $1, %rbx
        movzbq  (%rax,%rbx), %rax
        leaq    1(%rax,%rax), %rax
        ret
*)
external unsafe_get_int16 : string -> int -> int = "%caml_string_get16u"
let[@inline] get16 x i = unsafe_get_int16 x i
(* XXX(dinosaure): same as [unsafe_get] but for [int16]:
        sarq    $1, %rbx
        movzwq  (%rax,%rbx), %rax
        leaq    1(%rax,%rax), %rax
        ret
*)
let equal ~ln a b =
  let l1 = ln asr 1 in
  (*
        sarq    $1, %rcx
        orq     $1, %rcx
  *)
  let r = ref 0 in
  (*
        movq    $1, %rdx
  *)
  for i = 0 to pred l1 do r := !r lor (get16 a (i * 2) lxor get16 b (i * 2)) done ;
  (*
        movq    $1, %rsi
        addq    $-2, %rcx
        cmpq    %rcx, %rsi
        jg      .L104
.L105:
        leaq    -1(%rsi,%rsi), %r8
        sarq    $1, %r8
        movzwq  (%rdi,%r8), %r9
        leaq    1(%r9,%r9), %r9
        movzwq  (%rbx,%r8), %r8
        leaq    1(%r8,%r8), %r8
     // [unsafe_get_int16 a i] and [unsafe_get_int6 b i]
        xorq    %r9, %r8
        orq     $1, %r8
        orq     %r8, %rdx
        movq    %rsi, %r8
        addq    $2, %rsi
        cmpq    %rcx, %r8
        jne     .L105
.L104:
  *)
  for _ = 1 to ln land 1 do r := !r lor (get a (ln - 1) lxor get b (ln - 1)) done ;
  (*
        movq    $3, %rsi
        movq    %rax, %rcx
        andq    $3, %rcx
        cmpq    %rcx, %rsi
        jg      .L102
.L103:
        movq    %rax, %r8
        addq    $-2, %r8
        sarq    $1, %r8
        movzbq  (%rdi,%r8), %r9
        leaq    1(%r9,%r9), %r9
        movzbq  (%rbx,%r8), %r8
        leaq    1(%r8,%r8), %r8
     // [unsafe_get a i] and [unsafe_get b i]
        xorq    %r9, %r8
        orq     $1, %r8
        orq     %r8, %rdx
        movq    %rsi, %r8
        addq    $2, %rsi
        cmpq    %rcx, %r8
        jne     .L103
.L102:
  *)
  !r = 0
(*
        cmpq    $1, %rdx
        sete    %al
        movzbq  %al, %rax
        leaq    1(%rax,%rax), %rax
        ret
*)

Each OCaml code is compiled to a cleanly separated section of assembly, The resulting assembly contains no garbage collection, no types information, and no advanced features but still pretty readable.


Conclusion

eqaf allows us to have a basic understanding about OCaml’s assembly output without inferences of GC, boxed values, and advanced syntaxes such as pattern matching. We will explore more in the future issues.

Like this post? Subscribe to Learn OCaml the Hard Way to get more!

References

  1. For understanding OCaml’s syntax (and everything OCaml), Real World OCaml does way better job than I can.

#OCaml #Compiler #programming #Learn OCaml the Hard Way