Regex negative lookahead with Elixir

Published November 03, 2019 by Toran Billups

Anytime I need to use Regex for something I find myself looking for examples and often the Elixir specific results are slim to none for intermediate or advanced topics. So when I stumbled into a problem recently that required a negative lookahead, or lookbehind as it's called in some programming communities, I decided it was the perfect opportunity to share a few of the highlights.

If you prefer to skip the story and see the code you can find everything on github

The problem

Before we dive into the regular expression code I first wanted to outline the problem I had planned to solve. The tests below describe a program that looks through a list of usernames and suggests an alternative when the desired username is already in use.

    defmodule Regexx.UsernameTest do
      use ExUnit.Case, async: false
    
      alias Regexx.Username
    
      @username "foobarbaz"
    
      test "returns original username when all usernames is empty list" do
        possible_username =
          []
          |> Username.suggest(@username)
    
        assert possible_username == @username
      end
    
      test "returns alternative when username found in all usernames list" do
        possible_username =
          [@username]
          |> Username.suggest(@username)
    
        assert possible_username == "#{@username}-1"
      end
    
      test "returns alternative when both first and second options are taken" do
        possible_username =
          ["zipzapzoom", @username, "#{@username}-1"]
          |> Username.suggest(@username)
    
        assert possible_username == "#{@username}-2"
      end
    end
  

The solution I put together enumerates the list of all usernames and if the desired username is found I use recursion to suggest another.

    defmodule Regexx.Username do
      def suggest(all_usernames, username, suffix \\ 0) do
        possible_username =
          if suffix > 0 do
            "#{username}-" <> Integer.to_string(suffix)
          else
            username
          end
    
        already_exists =
          all_usernames
          |> Enum.find(fn existing -> existing == possible_username end)
    
        if already_exists == nil do
          possible_username
        else
          suggest(all_usernames, username, suffix + 1)
        end
      end
    end
  

Negative Lookahead

The first edge case I found with this test exposed that when a username already had a suffix of `-integer` the program would fail to identify it returning an incorrect username.

    defmodule Regexx.UsernameTest do
      use ExUnit.Case, async: false
    
      alias Regexx.Username
    
      @username "foobarbaz"
    
      test "returns alternative username when original ends with numeric suffix" do
        possible_username =
          ["zipzapzoom", "#{@username}-998"]
          |> Username.suggest("#{@username}-998")
    
        assert possible_username == "#{@username}-999"
      end
    end
  

To solve this I chose to reach for Regex.named_captures because it offered a great way to both conditionally check if username had a suffix and return a match when it did. If the username did have a suffix of some kind it was important to find and replace it properly.

    defmodule Regexx.Username do
      def suggest(all_usernames, username, suffix \\ 0) do
        possible_username =
          if suffix > 0 do
            case Regex.named_captures(~r/.*-(?.*[0-9])$/, username) do
              nil ->
                "#{username}-" <> Integer.to_string(suffix)
    
              %{"suffix" => previous} ->
                previous_number = previous |> String.to_integer()
                next_number = previous_number + suffix
                Regex.replace(~r/#{previous}(?!.*#{previous})/, username, "\\1") <> "#{next_number}"
            end
          else
            username
          end
    
        already_exists =
          all_usernames
          |> Enum.find(fn existing -> existing == possible_username end)
    
        if already_exists == nil do
          possible_username
        else
          suggest(all_usernames, username, suffix + 1)
        end
      end
    end
  

The solution I landed on with Regex.replace can seem a little obscure if you are regex illiterate like me so here is another way of looking at it.

Regex.replace(~r/998(?!.*998)/, foobarbaz-998, "\\1") <> 999

The regular expression `998(?!.*998)` indicates a negative lookahead for the number `998`. The direction of this lookaround is intentional to ensure the program will match and replace the number `998` found only at the end of username.

String.match

The next edge case I found with this test exposed that when a username ends with a dash but no integer we fail to properly suggest an alternative.

    defmodule Regexx.UsernameTest do
      use ExUnit.Case, async: false
    
      alias Regexx.Username
    
      @username "foobarbaz"
    
      test "returns alternative username when original ends with dash but no integer" do
        possible_username =
          ["#{@username}-"]
          |> Username.suggest("#{@username}-")
    
        assert possible_username == "#{@username}-1"
      end
    end
  

I decided to use String.match for this problem because you can pass the function a pattern and have the regular expression do the heavy lifting.

    defmodule Regexx.Username do
      def suggest(all_usernames, username, suffix \\ 0) do
        possible_username =
          if suffix > 0 do
            case Regex.named_captures(~r/.*-(?.*[0-9])$/, username) do
              nil ->
                case String.match?(username, ~r/.*-$/) do
                  false ->
                    "#{username}-" <> Integer.to_string(suffix)
    
                  true ->
                    Regex.replace(~r/-(?!.*-)/, username, "\\1") <> "-#{suffix}"
                end
    
              %{"suffix" => previous} ->
                previous_number = previous |> String.to_integer()
                next_number = previous_number + suffix
                Regex.replace(~r/#{previous}(?!.*#{previous})/, username, "\\1") <> "#{next_number}"
            end
          else
            username
          end
    
        already_exists =
          all_usernames
          |> Enum.find(fn existing -> existing == possible_username end)
    
        if already_exists == nil do
          possible_username
        else
          suggest(all_usernames, username, suffix + 1)
        end
      end
    end
  

The final solution properly handles this odd suffix ending with dash by using another negative lookahead.

Regex.replace(~r/-(?!.*-)/, foobarbaz-, "\\1") <> -1

This time around we search for a dash at the end of username to replace it with `-1` before finally returning the correct alternative `foobarbaz-1`

The images above that show a visual match of each regex are freely available at Regular Expressions 101 so be sure to check that site out if you want to experiment more with your own regular expressions. The source code for my Elixir adventure can be found on github


Buy Me a Coffee

Twitter / Github / Email