? Some Examples of Kotlin

1 Basic Syntax

// Lists

val list: List<Int> = listOf(1, 2, 3, 4, 5)

list
[1, 2, 3, 4, 5]

We create an immutable list of integers. Lists are generic. Here the list is of type List<Int>. It is a Kotlin collection, and has many higher order functions to support functional style collection processing.

// Anonymous function
val inc: (Int) -> Int = { x:Int ->
    x + 1
}
inc
(kotlin.Int) -> kotlin.Int

We can create anonymous functions. The full syntax is:

{ x:<type>, y:<type> -> 
  ... 
}
inc(10)
11
// Type signatures can be inferred
val inc = { x:Int -> x + 1 }
inc
(kotlin.Int) -> kotlin.Int

Here we are relying on the type signature of the parameters to infer the type signature of the symbol inc.

inc(10)
11
val inc:(Int)->Int = { 
    x -> x+1
}
inc
(kotlin.Int) -> kotlin.Int

We can also use the type signature of the symbol to infer the type signature(s) of the parameters of the anonymous function.

inc(10)
11
// Kotlin provides a more succinct syntax if there is only one parameter.
val inc:(Int)->Int = { it + 1 }

The idiomatic syntactic sugar that Kotlin provides is that when there is only one parameter, and that its type can be inferred, it can be represented by the keyword it. we don’t need parameter declaration.

inc(10)
11
// Higher order function over list

list.map(inc)
[2, 3, 4, 5, 6]

List<T>.map: (T->S)->List<S>

Therefore, list.map(f) will apply f to each element in the list.

// Syntactic sugar:
// if we embed the anonymous function declaration
// we do not need parenthesis

list.map {
    x -> x * 2
}
[2, 4, 6, 8, 10]

The syntax of anonymous function allows easy composition of with higher order functions.

// More syntactic sugar:
// Use the `it` parameter

list.map { it * 20 }
[20, 40, 60, 80, 100]

Using the it keyword, composing anonymous code block with higher order functions is even easier.

2 Loading data

import java.io.File
val dataFile = "/home/jovyan/public/datasets/my_BankerChurners.csv"
// read the lines

val lines: List<String> = File(dataFile).useLines {
    it.toList() 
}
lines.size
10128
lines[0]
"CLIENTNUM","Attrition_Flag","Customer_Age","Gender","Dependent_count","Education_Level","Marital_Status","Income_Category","Card_Category","Months_on_book","Total_Relationship_Count","Months_Inactive_12_mon","Contacts_Count_12_mon","Credit_Limit","Total_Revolving_Bal","Avg_Open_To_Buy","Total_Amt_Chng_Q4_Q1","Total_Trans_Amt","Total_Trans_Ct","Total_Ct_Chng_Q4_Q1","Avg_Utilization_Ratio","Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1","Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2"
// Let's look at the first 3 lines
lines.take(3)
["CLIENTNUM","Attrition_Flag","Customer_Age","Gender","Dependent_count","Education_Level","Marital_Status","Income_Category","Card_Category","Months_on_book","Total_Relationship_Count","Months_Inactive_12_mon","Contacts_Count_12_mon","Credit_Limit","Total_Revolving_Bal","Avg_Open_To_Buy","Total_Amt_Chng_Q4_Q1","Total_Trans_Amt","Total_Trans_Ct","Total_Ct_Chng_Q4_Q1","Avg_Utilization_Ratio","Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1","Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2", 768805383,"Existing Customer",45,"M",3,"High School","Married","$60K - $80K","Blue",39,5,1,3,12691,777,11914,1.335,1144,42,1.625,0.061,9.3448e-05,0.99991, 818770008,"Existing Customer",49,"F",5,"Graduate","Single","Less than $40K","Blue",44,6,1,2,8256,864,7392,1.541,1291,33,3.714,0.105,5.6861e-05,0.99994]
// Let's focus on just a single line and perform some parsing.
val line = lines[3]
line
713982108,"Existing Customer",51,"M",3,"Graduate","Married","$80K - $120K","Blue",36,4,1,0,3418,0,3418,2.594,1887,20,2.333,0,2.1081e-05,0.99998
// This is the syntactic domain analysis
line.split(',')
[713982108, "Existing Customer", 51, "M", 3, "Graduate", "Married", "$80K - $120K", "Blue", 36, 4, 1, 0, 3418, 0, 3418, 2.594, 1887, 20, 2.333, 0, 2.1081e-05, 0.99998]

3 Semantic Parsing

// Switch to semantic domain

enum class Gender {
    MALE,
    FEMALE,
    NON_BINARY,
    UNKNOWN,
}

data class Customer(
    var age: Int,
    var gender: Gender,
    var limit: Float,
)
// Extending string so it can parse itself to gender value

fun String.toGender(): Gender {
    return when(this.uppercase()[0]) {
        'M' -> Gender.MALE
        'F' -> Gender.FEMALE
        'N' -> Gender.NON_BINARY
        else -> Gender.UNKNOWN
    }
}
Pair("M".toGender(), "F".toGender())
(MALE, FEMALE)
// Figuring out the offsets
class CSVColumnPositions(var age:Int, var gender:Int, var limit: Int) {
    constructor(header:String): this(-1, -1, -1) {
        val columns = header.split(',').map {
            it.removeSurrounding("\"")
        }
        this.age = columns.indexOf("Customer_Age")
        this.gender = columns.indexOf("Gender")
        this.limit = columns.indexOf("Credit_Limit")
    }
    override fun toString():String 
    = "age@${age}, gender@${gender}, limit@${limit}"
}
val columnPositions = CSVColumnPositions(lines[0])
columnPositions
age@2, gender@3, limit@13
// Now we can parse each line into customer records

fun String.toCustomer(pos: CSVColumnPositions): Customer {
    val parts = this.split(",").map {
        x -> x.removeSurrounding("\"")
    }
    val age = parts[pos.age].toInt()
    val gender = parts[pos.gender].toGender()
    val limit = parts[pos.limit].toFloat()
    return Customer(age, gender, limit)
}
lines[1].toCustomer(columnPositions)
Customer(age=45, gender=MALE, limit=12691.0)

4 Functional Data Analytics

val positions = CSVColumnPositions(lines[0])

lines.drop(1).map {
    it.toCustomer(positions)
}
.take(5)
.forEachIndexed { index, customer ->
    println("$index: $customer")
}
0: Customer(age=45, gender=MALE, limit=12691.0)
1: Customer(age=49, gender=FEMALE, limit=8256.0)
2: Customer(age=51, gender=MALE, limit=3418.0)
3: Customer(age=40, gender=FEMALE, limit=3313.0)
4: Customer(age=40, gender=MALE, limit=4716.0)
lines.drop(1).map {
    it.toCustomer(positions)
}
.groupBy { it.gender }
.map { entry ->
    val (gender, customers) = entry
    "$gender, ${customers.size}"
}
[MALE, 4769, FEMALE, 5358]
data class Stats(
    val count:Int = 0,
    val limit:Float = 0f,
    val mean:Float = 0f
    ) {
    operator fun plus(customer:Customer):Stats =
        Stats(
            count+1,
            limit + customer.limit,
            (limit+customer.limit) / (count+1)
        )
    companion object {
        fun empty():Stats = Stats(0, 0f, 0f)
    }
}

lines.drop(1).map {
    it.toCustomer(positions)
}
.groupBy { it.gender }
.map { entry ->
    val (gender, customers) = entry
    customers.fold(
        Stats.empty(),
        { state, customer -> state + customer }
    ).let {
        object {
            val gender = gender
            val count = it.count
            val limit = it.limit.roundToLong()
            val mean = it.mean.roundToLong()
            override fun toString():String 
            = "(${gender}: count=${count}, limit=${limit}, mean=$mean)"
        }
    }
}.forEach {
    println(it)
}
(MALE: count=4769, limit=60498088, mean=12686)
(FEMALE: count=5358, limit=26917708, mean=5024)