From 0bb340e96ecbc0665f779a8bda6b3849f687e2da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kuba=20Szczodrzy=C5=84ski?= Date: Wed, 19 Feb 2020 18:58:57 +0100 Subject: [PATCH] [API/Mobidziennik] Implement web attendance scrapper. --- .../edziennik/data/api/Regexes.kt | 17 ++ .../mobidziennik/MobidziennikFeatures.kt | 8 +- .../mobidziennik/data/MobidziennikData.kt | 10 +- .../mobidziennik/data/api/MobidziennikApi.kt | 2 +- .../data/web/MobidziennikWebAttendance.kt | 156 ++++++++++++++++++ .../edziennik/data/api/models/Data.kt | 1 + .../data/api/models/DataRemoveModel.kt | 13 ++ .../szczodrzynski/edziennik/data/db/AppDb.kt | 5 +- .../data/db/migration/Migration77.kt | 19 +++ .../attendance/AttendanceFragment.java | 58 +++---- 10 files changed, 246 insertions(+), 43 deletions(-) create mode 100644 app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/web/MobidziennikWebAttendance.kt create mode 100644 app/src/main/java/pl/szczodrzynski/edziennik/data/db/migration/Migration77.kt diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/Regexes.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/Regexes.kt index 72df48c7..725d4fc7 100644 --- a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/Regexes.kt +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/Regexes.kt @@ -67,6 +67,23 @@ object Regexes { } + val MOBIDZIENNIK_ATTENDANCE_TABLE by lazy { + """(.+?)
""".toRegex(DOT_MATCHES_ALL) + } + val MOBIDZIENNIK_ATTENDANCE_LESSON_COUNT by lazy { + """rel="([0-9-]{10})" colspan="([0-9]+)"""".toRegex() + } + val MOBIDZIENNIK_ATTENDANCE_ENTRIES by lazy { + """font-size:.+?class=".*?">(.*?)""".toRegex(DOT_MATCHES_ALL) + } + val MOBIDZIENNIK_ATTENDANCE_RANGE by lazy { + """([0-9:]+) - .+? (.+?)""".toRegex(DOT_MATCHES_ALL) + } + val MOBIDZIENNIK_ATTENDANCE_LESSON by lazy { + """(.+?) - (.*?).+?.+?\((.+?), .+?(.+?)\)""".toRegex(DOT_MATCHES_ALL) + } + + val IDZIENNIK_LOGIN_HIDDEN_FIELDS by lazy { """""".toRegex(DOT_MATCHES_ALL) diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/MobidziennikFeatures.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/MobidziennikFeatures.kt index b96210fb..d056fc4b 100644 --- a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/MobidziennikFeatures.kt +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/MobidziennikFeatures.kt @@ -58,10 +58,12 @@ val MobidziennikFeatures = listOf( ENDPOINT_MOBIDZIENNIK_API_MAIN to LOGIN_METHOD_MOBIDZIENNIK_WEB, ENDPOINT_MOBIDZIENNIK_WEB_NOTICES to LOGIN_METHOD_MOBIDZIENNIK_WEB ), listOf(LOGIN_METHOD_MOBIDZIENNIK_WEB, LOGIN_METHOD_MOBIDZIENNIK_WEB)), - // attendance TODO implement website attendance scraping - /*Feature(LOGIN_TYPE_MOBIDZIENNIK, FEATURE_ATTENDANCE, listOf( + /** + * Attendance - only web scraping. + */ + Feature(LOGIN_TYPE_MOBIDZIENNIK, FEATURE_ATTENDANCE, listOf( ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE to LOGIN_METHOD_MOBIDZIENNIK_WEB - ), listOf(LOGIN_METHOD_MOBIDZIENNIK_WEB)),*/ + ), listOf(LOGIN_METHOD_MOBIDZIENNIK_WEB)), diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/MobidziennikData.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/MobidziennikData.kt index ff66985d..c12f75c5 100644 --- a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/MobidziennikData.kt +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/MobidziennikData.kt @@ -71,15 +71,15 @@ class MobidziennikData(val data: DataMobidziennik, val onSuccess: () -> Unit) { ENDPOINT_MOBIDZIENNIK_WEB_ACCOUNT_EMAIL -> { data.startProgress(R.string.edziennik_progress_endpoint_account_details) MobidziennikWebAccountEmail(data, lastSync, onSuccess) - }/* - ENDPOINT_MOBIDZIENNIK_WEB_NOTICES -> { - data.startProgress(R.string.edziennik_progress_endpoint_behaviour) - MobidziennikWebNotices(data, lastSync, onSuccess) } ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE -> { data.startProgress(R.string.edziennik_progress_endpoint_attendance) MobidziennikWebAttendance(data, lastSync, onSuccess) - } + }/* + ENDPOINT_MOBIDZIENNIK_WEB_NOTICES -> { + data.startProgress(R.string.edziennik_progress_endpoint_behaviour) + MobidziennikWebNotices(data, lastSync, onSuccess) + }] ENDPOINT_MOBIDZIENNIK_WEB_MANUALS -> { data.startProgress(R.string.edziennik_progress_endpoint_lucky_number) MobidziennikWebManuals(data, lastSync, onSuccess) diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/api/MobidziennikApi.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/api/MobidziennikApi.kt index d1305a7b..323aeb14 100644 --- a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/api/MobidziennikApi.kt +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/api/MobidziennikApi.kt @@ -39,7 +39,7 @@ class MobidziennikApi(override val data: DataMobidziennik, 9 -> MobidziennikApiTeams(data, null, rows) 14 -> MobidziennikApiGradeCategories(data, rows) 15 -> MobidziennikApiLessons(data, rows) - 16 -> MobidziennikApiAttendance(data, rows) + //16 -> MobidziennikApiAttendance(data, rows) // disabled since the new web scrapper is used 17 -> MobidziennikApiNotices(data, rows) 18 -> MobidziennikApiGrades(data, rows) 21 -> MobidziennikApiEvents(data, rows) diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/web/MobidziennikWebAttendance.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/web/MobidziennikWebAttendance.kt new file mode 100644 index 00000000..c56c56d9 --- /dev/null +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/edziennik/mobidziennik/data/web/MobidziennikWebAttendance.kt @@ -0,0 +1,156 @@ +/* + * Copyright (c) Kuba Szczodrzyński 2020-2-18. + */ + +package pl.szczodrzynski.edziennik.data.api.edziennik.mobidziennik.data.web + +import pl.szczodrzynski.edziennik.data.api.POST +import pl.szczodrzynski.edziennik.data.api.Regexes +import pl.szczodrzynski.edziennik.data.api.edziennik.mobidziennik.DataMobidziennik +import pl.szczodrzynski.edziennik.data.api.edziennik.mobidziennik.ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE +import pl.szczodrzynski.edziennik.data.api.edziennik.mobidziennik.data.MobidziennikWeb +import pl.szczodrzynski.edziennik.data.api.models.DataRemoveModel +import pl.szczodrzynski.edziennik.data.db.entity.Attendance +import pl.szczodrzynski.edziennik.data.db.entity.Attendance.* +import pl.szczodrzynski.edziennik.data.db.entity.Metadata +import pl.szczodrzynski.edziennik.data.db.entity.SYNC_ALWAYS +import pl.szczodrzynski.edziennik.fixName +import pl.szczodrzynski.edziennik.get +import pl.szczodrzynski.edziennik.singleOrNull +import pl.szczodrzynski.edziennik.utils.Utils.d +import pl.szczodrzynski.edziennik.utils.models.Date +import pl.szczodrzynski.edziennik.utils.models.Time +import pl.szczodrzynski.edziennik.utils.models.Week + +class MobidziennikWebAttendance(override val data: DataMobidziennik, + override val lastSync: Long?, + val onSuccess: (endpointId: Int) -> Unit +) : MobidziennikWeb(data, lastSync) { + companion object { + private const val TAG = "MobidziennikWebAttendance" + } + + init { data.profile?.let { profile -> + val lastSync = lastSync?.let { Date.fromMillis(it) } ?: profile.dateSemester1Start + var weekStart = Week.getWeekStart() + val syncWeeks = mutableListOf(weekStart) + while (weekStart >= lastSync && weekStart > profile.dateSemester1Start) { + weekStart = weekStart.clone().stepForward(0, 0, -7) + syncWeeks += weekStart + } + + //syncWeeks.clear() + //syncWeeks += Date.fromY_m_d("2019-12-19") + + syncWeeks.minBy { it.value }?.let { + data.toRemove.add(DataRemoveModel.Attendance.from(it)) + } + + start(syncWeeks) + + } ?: onSuccess(ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE) } + + private fun start(syncWeeks: MutableList) { + if (syncWeeks.isEmpty()) { + data.setSyncNext(ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE, SYNC_ALWAYS) + onSuccess(ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE) + return + } + sync(syncWeeks.removeAt(0).stringY_m_d) { + start(syncWeeks) + } + } + + private fun sync(weekStart: String, onSuccess: () -> Unit) { + val requestTime = System.currentTimeMillis() + webGet(TAG, "/dziennik/frekwencja", method = POST, parameters = listOf( + "uczen" to data.studentId, + "data_poniedzialek" to weekStart + )) { text -> + MobidziennikLuckyNumberExtractor(data, text) + + val start = System.currentTimeMillis() + + Regexes.MOBIDZIENNIK_ATTENDANCE_TABLE.findAll(text).forEach { tableResult -> + val table = tableResult[1] + val lessonDates = mutableListOf() + val entries = mutableListOf() + Regexes.MOBIDZIENNIK_ATTENDANCE_LESSON_COUNT.findAll(table).forEach { + val date = Date.fromY_m_d(it[1]) + for (i in 0 until (it[2].toIntOrNull() ?: 0)) { + lessonDates += date + } + } + Regexes.MOBIDZIENNIK_ATTENDANCE_ENTRIES.findAll(table).mapTo(entries) { it[1] } + + val dateIterator = lessonDates.iterator() + val entriesIterator = entries.iterator() + Regexes.MOBIDZIENNIK_ATTENDANCE_RANGE.findAll(table).let { ranges -> + val count = ranges.count() + // verify the lesson count is the same as dates & entries + if (count != lessonDates.count() || count != entries.count()) + return@forEach + ranges.forEach { range -> + val lessonDate = dateIterator.next() + val entry = entriesIterator.next() + if (entry.isBlank()) + return@forEach + val startTime = Time.fromH_m(range[1]) + val entryIterator = entry.iterator() + range[2].split(" / ").mapNotNull { Regexes.MOBIDZIENNIK_ATTENDANCE_LESSON.find(it) }.forEachIndexed { index, lesson -> + val topic = lesson[2] + if (topic.startsWith("Lekcja odwołana: ") || !entryIterator.hasNext()) + return@forEachIndexed + val subjectName = lesson[1] + //val team = lesson[3] + val teacherName = lesson[4].fixName() + + val teacherId = data.teacherList.singleOrNull { it.fullNameLastFirst == teacherName }?.id ?: -1 + val subjectId = data.subjectList.singleOrNull { it.longName == subjectName }?.id ?: -1 + + val type = when (entryIterator.nextChar()) { + '.' -> TYPE_PRESENT + '|' -> TYPE_ABSENT + '+' -> TYPE_ABSENT_EXCUSED + 's' -> TYPE_BELATED + 'z' -> TYPE_RELEASED + else -> TYPE_PRESENT + } + val semester = data.profile?.dateToSemester(lessonDate) ?: 1 + + val id = lessonDate.combineWith(startTime) / 6L * 10L + (lesson[0].hashCode() and 0xFFFF) + index + + val attendanceObject = Attendance( + data.profileId, + id, + teacherId, + subjectId, + semester, + topic, + lessonDate, + startTime, + type) + + data.attendanceList.add(attendanceObject) + if (type != TYPE_PRESENT) { + data.metadataList.add( + Metadata( + data.profileId, + Metadata.TYPE_ATTENDANCE, + id, + data.profile?.empty ?: false, + data.profile?.empty ?: false, + System.currentTimeMillis() + )) + } + } + } + } + } + + d(TAG, "Done in ${System.currentTimeMillis()-start} ms (request ${start-requestTime} ms)") + + onSuccess() + } + } +} diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/models/Data.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/models/Data.kt index 3acb62ec..b5086539 100644 --- a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/models/Data.kt +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/models/Data.kt @@ -262,6 +262,7 @@ abstract class Data(val app: App, val profile: Profile?, val loginStore: LoginSt is DataRemoveModel.Timetable -> model.commit(profileId, db.timetableDao()) is DataRemoveModel.Grades -> model.commit(profileId, db.gradeDao()) is DataRemoveModel.Events -> model.commit(profileId, db.eventDao()) + is DataRemoveModel.Attendance -> model.commit(profileId, db.attendanceDao()) } } diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/models/DataRemoveModel.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/models/DataRemoveModel.kt index 0329cdee..ccccc00e 100644 --- a/app/src/main/java/pl/szczodrzynski/edziennik/data/api/models/DataRemoveModel.kt +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/api/models/DataRemoveModel.kt @@ -4,6 +4,7 @@ package pl.szczodrzynski.edziennik.data.api.models +import pl.szczodrzynski.edziennik.data.db.dao.AttendanceDao import pl.szczodrzynski.edziennik.data.db.dao.EventDao import pl.szczodrzynski.edziennik.data.db.dao.GradeDao import pl.szczodrzynski.edziennik.data.db.dao.TimetableDao @@ -60,4 +61,16 @@ open class DataRemoveModel { exceptTypes?.let { dao.removeFutureExceptTypes(profileId, Date.getToday(), it) } } } + + data class Attendance(private val dateFrom: Date?) : DataRemoveModel() { + companion object { + fun from(dateFrom: Date) = Attendance(dateFrom) + } + + fun commit(profileId: Int, dao: AttendanceDao) { + if (dateFrom != null) { + dao.clearAfterDate(profileId, dateFrom) + } + } + } } diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/db/AppDb.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/db/AppDb.kt index f875a117..8d521e83 100644 --- a/app/src/main/java/pl/szczodrzynski/edziennik/data/db/AppDb.kt +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/db/AppDb.kt @@ -42,7 +42,7 @@ import pl.szczodrzynski.edziennik.data.db.migration.* ConfigEntry::class, LibrusLesson::class, Metadata::class -], version = 76) +], version = 77) @TypeConverters( ConverterTime::class, ConverterDate::class, @@ -159,7 +159,8 @@ abstract class AppDb : RoomDatabase() { Migration73(), Migration74(), Migration75(), - Migration76() + Migration76(), + Migration77() ).allowMainThreadQueries().build() } } diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/data/db/migration/Migration77.kt b/app/src/main/java/pl/szczodrzynski/edziennik/data/db/migration/Migration77.kt new file mode 100644 index 00000000..f0b083a0 --- /dev/null +++ b/app/src/main/java/pl/szczodrzynski/edziennik/data/db/migration/Migration77.kt @@ -0,0 +1,19 @@ +/* + * Copyright (c) Kuba Szczodrzyński 2020-2-19. + */ + +package pl.szczodrzynski.edziennik.data.db.migration + +import androidx.room.migration.Migration +import androidx.sqlite.db.SupportSQLiteDatabase + +class Migration77 : Migration(76, 77) { + override fun migrate(database: SupportSQLiteDatabase) { + // mobidziennik web attendance implementation: + // delete all attendance from mobidziennik profiles + // (ID conflict/duplicated items - no ID in HTML of the website) + database.execSQL("DELETE FROM attendances WHERE profileId IN (SELECT profileId FROM profiles WHERE loginStoreType = 1 AND archived = 0);") + // mark the web attendance endpoint to force sync + database.execSQL("DELETE FROM endpointTimers WHERE endpointId = 2050;") + } +} diff --git a/app/src/main/java/pl/szczodrzynski/edziennik/ui/modules/attendance/AttendanceFragment.java b/app/src/main/java/pl/szczodrzynski/edziennik/ui/modules/attendance/AttendanceFragment.java index a149bcc1..4af8e2b7 100644 --- a/app/src/main/java/pl/szczodrzynski/edziennik/ui/modules/attendance/AttendanceFragment.java +++ b/app/src/main/java/pl/szczodrzynski/edziennik/ui/modules/attendance/AttendanceFragment.java @@ -45,7 +45,6 @@ import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_BELATED; import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_BELATED_EXCUSED; import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_PRESENT; import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_RELEASED; -import static pl.szczodrzynski.edziennik.data.db.entity.LoginStore.LOGIN_TYPE_MOBIDZIENNIK; import static pl.szczodrzynski.edziennik.data.db.entity.LoginStore.LOGIN_TYPE_VULCAN; import static pl.szczodrzynski.edziennik.data.db.entity.Metadata.TYPE_ATTENDANCE; @@ -147,38 +146,33 @@ public class AttendanceFragment extends Fragment { } }*/ - if (app.getProfile().getLoginStoreType() == LOGIN_TYPE_MOBIDZIENNIK) { - b.attendanceSummarySubject.setVisibility(View.GONE); - } - else { - b.attendanceSummarySubject.setOnClickListener((v -> { - AsyncTask.execute(() -> { - List subjectList = App.db.subjectDao().getAllNow(App.Companion.getProfileId()); - PopupMenu popupMenu = new PopupMenu(activity, b.attendanceSummarySubject, Gravity.END); - popupMenu.getMenu().add(0, -1, 0, R.string.subject_filter_disabled); - int index = 0; - DecimalFormat format = new DecimalFormat("0.00"); - for (Subject subject: subjectList) { - int total = subjectTotalCount.get(subject.id, new int[3])[displayMode]; - int absent = subjectAbsentCount.get(subject.id, new int[3])[displayMode]; - if (total == 0) - continue; - int present = total - absent; - float percentage = (float)present / (float)total * 100.0f; - String percentageStr = format.format(percentage); - popupMenu.getMenu().add(0, (int)subject.id, index++, getString(R.string.subject_filter_format, subject.longName, percentageStr)); - } - popupMenu.setOnMenuItemClickListener((item -> { - subjectIdFilter = item.getItemId(); - b.attendanceSummarySubject.setText(item.getTitle().toString().replaceAll("\\s-\\s[0-9]{1,2}\\.[0-9]{1,2}%", "")); - updateList(); - return true; - })); - new Handler(activity.getMainLooper()).post(popupMenu::show); - }); + b.attendanceSummarySubject.setOnClickListener((v -> { + AsyncTask.execute(() -> { + List subjectList = App.db.subjectDao().getAllNow(App.Companion.getProfileId()); + PopupMenu popupMenu = new PopupMenu(activity, b.attendanceSummarySubject, Gravity.END); + popupMenu.getMenu().add(0, -1, 0, R.string.subject_filter_disabled); + int index = 0; + DecimalFormat format = new DecimalFormat("0.00"); + for (Subject subject: subjectList) { + int total = subjectTotalCount.get(subject.id, new int[3])[displayMode]; + int absent = subjectAbsentCount.get(subject.id, new int[3])[displayMode]; + if (total == 0) + continue; + int present = total - absent; + float percentage = (float)present / (float)total * 100.0f; + String percentageStr = format.format(percentage); + popupMenu.getMenu().add(0, (int)subject.id, index++, getString(R.string.subject_filter_format, subject.longName, percentageStr)); + } + popupMenu.setOnMenuItemClickListener((item -> { + subjectIdFilter = item.getItemId(); + b.attendanceSummarySubject.setText(item.getTitle().toString().replaceAll("\\s-\\s[0-9]{1,2}\\.[0-9]{1,2}%", "")); + updateList(); + return true; + })); + new Handler(activity.getMainLooper()).post(popupMenu::show); + }); - })); - } + })); LinearLayoutManager linearLayoutManager = new LinearLayoutManager(getContext());