[API/Mobidziennik] Implement web attendance scrapper.

This commit is contained in:
Kuba Szczodrzyński 2020-02-19 18:58:57 +01:00
parent f0447dc455
commit 0bb340e96e
10 changed files with 246 additions and 43 deletions

View File

@ -67,6 +67,23 @@ object Regexes {
} }
val MOBIDZIENNIK_ATTENDANCE_TABLE by lazy {
"""<table .+?id="obecnosci_tabela">(.+?)</table>""".toRegex(DOT_MATCHES_ALL)
}
val MOBIDZIENNIK_ATTENDANCE_LESSON_COUNT by lazy {
"""rel="([0-9-]{10})" colspan="([0-9]+)"""".toRegex()
}
val MOBIDZIENNIK_ATTENDANCE_ENTRIES by lazy {
"""font-size:.+?class=".*?">(.*?)</td>""".toRegex(DOT_MATCHES_ALL)
}
val MOBIDZIENNIK_ATTENDANCE_RANGE by lazy {
"""<span>([0-9:]+) - .+? (.+?)</span></a>""".toRegex(DOT_MATCHES_ALL)
}
val MOBIDZIENNIK_ATTENDANCE_LESSON by lazy {
"""<strong>(.+?) - (.*?)</strong>.+?<small>.+?\((.+?), .+?(.+?)\)""".toRegex(DOT_MATCHES_ALL)
}
val IDZIENNIK_LOGIN_HIDDEN_FIELDS by lazy { val IDZIENNIK_LOGIN_HIDDEN_FIELDS by lazy {
"""<input type="hidden".+?name="([A-z0-9_]+)?".+?value="([A-z0-9_+-/=]+)?".+?>""".toRegex(DOT_MATCHES_ALL) """<input type="hidden".+?name="([A-z0-9_]+)?".+?value="([A-z0-9_+-/=]+)?".+?>""".toRegex(DOT_MATCHES_ALL)

View File

@ -58,10 +58,12 @@ val MobidziennikFeatures = listOf(
ENDPOINT_MOBIDZIENNIK_API_MAIN to LOGIN_METHOD_MOBIDZIENNIK_WEB, ENDPOINT_MOBIDZIENNIK_API_MAIN to LOGIN_METHOD_MOBIDZIENNIK_WEB,
ENDPOINT_MOBIDZIENNIK_WEB_NOTICES to LOGIN_METHOD_MOBIDZIENNIK_WEB ENDPOINT_MOBIDZIENNIK_WEB_NOTICES to LOGIN_METHOD_MOBIDZIENNIK_WEB
), listOf(LOGIN_METHOD_MOBIDZIENNIK_WEB, LOGIN_METHOD_MOBIDZIENNIK_WEB)), ), listOf(LOGIN_METHOD_MOBIDZIENNIK_WEB, LOGIN_METHOD_MOBIDZIENNIK_WEB)),
// attendance TODO implement website attendance scraping /**
/*Feature(LOGIN_TYPE_MOBIDZIENNIK, FEATURE_ATTENDANCE, listOf( * Attendance - only web scraping.
*/
Feature(LOGIN_TYPE_MOBIDZIENNIK, FEATURE_ATTENDANCE, listOf(
ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE to LOGIN_METHOD_MOBIDZIENNIK_WEB ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE to LOGIN_METHOD_MOBIDZIENNIK_WEB
), listOf(LOGIN_METHOD_MOBIDZIENNIK_WEB)),*/ ), listOf(LOGIN_METHOD_MOBIDZIENNIK_WEB)),

View File

@ -71,15 +71,15 @@ class MobidziennikData(val data: DataMobidziennik, val onSuccess: () -> Unit) {
ENDPOINT_MOBIDZIENNIK_WEB_ACCOUNT_EMAIL -> { ENDPOINT_MOBIDZIENNIK_WEB_ACCOUNT_EMAIL -> {
data.startProgress(R.string.edziennik_progress_endpoint_account_details) data.startProgress(R.string.edziennik_progress_endpoint_account_details)
MobidziennikWebAccountEmail(data, lastSync, onSuccess) MobidziennikWebAccountEmail(data, lastSync, onSuccess)
}/*
ENDPOINT_MOBIDZIENNIK_WEB_NOTICES -> {
data.startProgress(R.string.edziennik_progress_endpoint_behaviour)
MobidziennikWebNotices(data, lastSync, onSuccess)
} }
ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE -> { ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE -> {
data.startProgress(R.string.edziennik_progress_endpoint_attendance) data.startProgress(R.string.edziennik_progress_endpoint_attendance)
MobidziennikWebAttendance(data, lastSync, onSuccess) MobidziennikWebAttendance(data, lastSync, onSuccess)
} }/*
ENDPOINT_MOBIDZIENNIK_WEB_NOTICES -> {
data.startProgress(R.string.edziennik_progress_endpoint_behaviour)
MobidziennikWebNotices(data, lastSync, onSuccess)
}]
ENDPOINT_MOBIDZIENNIK_WEB_MANUALS -> { ENDPOINT_MOBIDZIENNIK_WEB_MANUALS -> {
data.startProgress(R.string.edziennik_progress_endpoint_lucky_number) data.startProgress(R.string.edziennik_progress_endpoint_lucky_number)
MobidziennikWebManuals(data, lastSync, onSuccess) MobidziennikWebManuals(data, lastSync, onSuccess)

View File

@ -39,7 +39,7 @@ class MobidziennikApi(override val data: DataMobidziennik,
9 -> MobidziennikApiTeams(data, null, rows) 9 -> MobidziennikApiTeams(data, null, rows)
14 -> MobidziennikApiGradeCategories(data, rows) 14 -> MobidziennikApiGradeCategories(data, rows)
15 -> MobidziennikApiLessons(data, rows) 15 -> MobidziennikApiLessons(data, rows)
16 -> MobidziennikApiAttendance(data, rows) //16 -> MobidziennikApiAttendance(data, rows) // disabled since the new web scrapper is used
17 -> MobidziennikApiNotices(data, rows) 17 -> MobidziennikApiNotices(data, rows)
18 -> MobidziennikApiGrades(data, rows) 18 -> MobidziennikApiGrades(data, rows)
21 -> MobidziennikApiEvents(data, rows) 21 -> MobidziennikApiEvents(data, rows)

View File

@ -0,0 +1,156 @@
/*
* Copyright (c) Kuba Szczodrzyński 2020-2-18.
*/
package pl.szczodrzynski.edziennik.data.api.edziennik.mobidziennik.data.web
import pl.szczodrzynski.edziennik.data.api.POST
import pl.szczodrzynski.edziennik.data.api.Regexes
import pl.szczodrzynski.edziennik.data.api.edziennik.mobidziennik.DataMobidziennik
import pl.szczodrzynski.edziennik.data.api.edziennik.mobidziennik.ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE
import pl.szczodrzynski.edziennik.data.api.edziennik.mobidziennik.data.MobidziennikWeb
import pl.szczodrzynski.edziennik.data.api.models.DataRemoveModel
import pl.szczodrzynski.edziennik.data.db.entity.Attendance
import pl.szczodrzynski.edziennik.data.db.entity.Attendance.*
import pl.szczodrzynski.edziennik.data.db.entity.Metadata
import pl.szczodrzynski.edziennik.data.db.entity.SYNC_ALWAYS
import pl.szczodrzynski.edziennik.fixName
import pl.szczodrzynski.edziennik.get
import pl.szczodrzynski.edziennik.singleOrNull
import pl.szczodrzynski.edziennik.utils.Utils.d
import pl.szczodrzynski.edziennik.utils.models.Date
import pl.szczodrzynski.edziennik.utils.models.Time
import pl.szczodrzynski.edziennik.utils.models.Week
class MobidziennikWebAttendance(override val data: DataMobidziennik,
override val lastSync: Long?,
val onSuccess: (endpointId: Int) -> Unit
) : MobidziennikWeb(data, lastSync) {
companion object {
private const val TAG = "MobidziennikWebAttendance"
}
init { data.profile?.let { profile ->
val lastSync = lastSync?.let { Date.fromMillis(it) } ?: profile.dateSemester1Start
var weekStart = Week.getWeekStart()
val syncWeeks = mutableListOf<Date>(weekStart)
while (weekStart >= lastSync && weekStart > profile.dateSemester1Start) {
weekStart = weekStart.clone().stepForward(0, 0, -7)
syncWeeks += weekStart
}
//syncWeeks.clear()
//syncWeeks += Date.fromY_m_d("2019-12-19")
syncWeeks.minBy { it.value }?.let {
data.toRemove.add(DataRemoveModel.Attendance.from(it))
}
start(syncWeeks)
} ?: onSuccess(ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE) }
private fun start(syncWeeks: MutableList<Date>) {
if (syncWeeks.isEmpty()) {
data.setSyncNext(ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE, SYNC_ALWAYS)
onSuccess(ENDPOINT_MOBIDZIENNIK_WEB_ATTENDANCE)
return
}
sync(syncWeeks.removeAt(0).stringY_m_d) {
start(syncWeeks)
}
}
private fun sync(weekStart: String, onSuccess: () -> Unit) {
val requestTime = System.currentTimeMillis()
webGet(TAG, "/dziennik/frekwencja", method = POST, parameters = listOf(
"uczen" to data.studentId,
"data_poniedzialek" to weekStart
)) { text ->
MobidziennikLuckyNumberExtractor(data, text)
val start = System.currentTimeMillis()
Regexes.MOBIDZIENNIK_ATTENDANCE_TABLE.findAll(text).forEach { tableResult ->
val table = tableResult[1]
val lessonDates = mutableListOf<Date>()
val entries = mutableListOf<String>()
Regexes.MOBIDZIENNIK_ATTENDANCE_LESSON_COUNT.findAll(table).forEach {
val date = Date.fromY_m_d(it[1])
for (i in 0 until (it[2].toIntOrNull() ?: 0)) {
lessonDates += date
}
}
Regexes.MOBIDZIENNIK_ATTENDANCE_ENTRIES.findAll(table).mapTo(entries) { it[1] }
val dateIterator = lessonDates.iterator()
val entriesIterator = entries.iterator()
Regexes.MOBIDZIENNIK_ATTENDANCE_RANGE.findAll(table).let { ranges ->
val count = ranges.count()
// verify the lesson count is the same as dates & entries
if (count != lessonDates.count() || count != entries.count())
return@forEach
ranges.forEach { range ->
val lessonDate = dateIterator.next()
val entry = entriesIterator.next()
if (entry.isBlank())
return@forEach
val startTime = Time.fromH_m(range[1])
val entryIterator = entry.iterator()
range[2].split(" / ").mapNotNull { Regexes.MOBIDZIENNIK_ATTENDANCE_LESSON.find(it) }.forEachIndexed { index, lesson ->
val topic = lesson[2]
if (topic.startsWith("Lekcja odwołana: ") || !entryIterator.hasNext())
return@forEachIndexed
val subjectName = lesson[1]
//val team = lesson[3]
val teacherName = lesson[4].fixName()
val teacherId = data.teacherList.singleOrNull { it.fullNameLastFirst == teacherName }?.id ?: -1
val subjectId = data.subjectList.singleOrNull { it.longName == subjectName }?.id ?: -1
val type = when (entryIterator.nextChar()) {
'.' -> TYPE_PRESENT
'|' -> TYPE_ABSENT
'+' -> TYPE_ABSENT_EXCUSED
's' -> TYPE_BELATED
'z' -> TYPE_RELEASED
else -> TYPE_PRESENT
}
val semester = data.profile?.dateToSemester(lessonDate) ?: 1
val id = lessonDate.combineWith(startTime) / 6L * 10L + (lesson[0].hashCode() and 0xFFFF) + index
val attendanceObject = Attendance(
data.profileId,
id,
teacherId,
subjectId,
semester,
topic,
lessonDate,
startTime,
type)
data.attendanceList.add(attendanceObject)
if (type != TYPE_PRESENT) {
data.metadataList.add(
Metadata(
data.profileId,
Metadata.TYPE_ATTENDANCE,
id,
data.profile?.empty ?: false,
data.profile?.empty ?: false,
System.currentTimeMillis()
))
}
}
}
}
}
d(TAG, "Done in ${System.currentTimeMillis()-start} ms (request ${start-requestTime} ms)")
onSuccess()
}
}
}

View File

@ -262,6 +262,7 @@ abstract class Data(val app: App, val profile: Profile?, val loginStore: LoginSt
is DataRemoveModel.Timetable -> model.commit(profileId, db.timetableDao()) is DataRemoveModel.Timetable -> model.commit(profileId, db.timetableDao())
is DataRemoveModel.Grades -> model.commit(profileId, db.gradeDao()) is DataRemoveModel.Grades -> model.commit(profileId, db.gradeDao())
is DataRemoveModel.Events -> model.commit(profileId, db.eventDao()) is DataRemoveModel.Events -> model.commit(profileId, db.eventDao())
is DataRemoveModel.Attendance -> model.commit(profileId, db.attendanceDao())
} }
} }

View File

@ -4,6 +4,7 @@
package pl.szczodrzynski.edziennik.data.api.models package pl.szczodrzynski.edziennik.data.api.models
import pl.szczodrzynski.edziennik.data.db.dao.AttendanceDao
import pl.szczodrzynski.edziennik.data.db.dao.EventDao import pl.szczodrzynski.edziennik.data.db.dao.EventDao
import pl.szczodrzynski.edziennik.data.db.dao.GradeDao import pl.szczodrzynski.edziennik.data.db.dao.GradeDao
import pl.szczodrzynski.edziennik.data.db.dao.TimetableDao import pl.szczodrzynski.edziennik.data.db.dao.TimetableDao
@ -60,4 +61,16 @@ open class DataRemoveModel {
exceptTypes?.let { dao.removeFutureExceptTypes(profileId, Date.getToday(), it) } exceptTypes?.let { dao.removeFutureExceptTypes(profileId, Date.getToday(), it) }
} }
} }
data class Attendance(private val dateFrom: Date?) : DataRemoveModel() {
companion object {
fun from(dateFrom: Date) = Attendance(dateFrom)
}
fun commit(profileId: Int, dao: AttendanceDao) {
if (dateFrom != null) {
dao.clearAfterDate(profileId, dateFrom)
}
}
}
} }

View File

@ -42,7 +42,7 @@ import pl.szczodrzynski.edziennik.data.db.migration.*
ConfigEntry::class, ConfigEntry::class,
LibrusLesson::class, LibrusLesson::class,
Metadata::class Metadata::class
], version = 76) ], version = 77)
@TypeConverters( @TypeConverters(
ConverterTime::class, ConverterTime::class,
ConverterDate::class, ConverterDate::class,
@ -159,7 +159,8 @@ abstract class AppDb : RoomDatabase() {
Migration73(), Migration73(),
Migration74(), Migration74(),
Migration75(), Migration75(),
Migration76() Migration76(),
Migration77()
).allowMainThreadQueries().build() ).allowMainThreadQueries().build()
} }
} }

View File

@ -0,0 +1,19 @@
/*
* Copyright (c) Kuba Szczodrzyński 2020-2-19.
*/
package pl.szczodrzynski.edziennik.data.db.migration
import androidx.room.migration.Migration
import androidx.sqlite.db.SupportSQLiteDatabase
class Migration77 : Migration(76, 77) {
override fun migrate(database: SupportSQLiteDatabase) {
// mobidziennik web attendance implementation:
// delete all attendance from mobidziennik profiles
// (ID conflict/duplicated items - no ID in HTML of the website)
database.execSQL("DELETE FROM attendances WHERE profileId IN (SELECT profileId FROM profiles WHERE loginStoreType = 1 AND archived = 0);")
// mark the web attendance endpoint to force sync
database.execSQL("DELETE FROM endpointTimers WHERE endpointId = 2050;")
}
}

View File

@ -45,7 +45,6 @@ import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_BELATED;
import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_BELATED_EXCUSED; import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_BELATED_EXCUSED;
import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_PRESENT; import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_PRESENT;
import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_RELEASED; import static pl.szczodrzynski.edziennik.data.db.entity.Attendance.TYPE_RELEASED;
import static pl.szczodrzynski.edziennik.data.db.entity.LoginStore.LOGIN_TYPE_MOBIDZIENNIK;
import static pl.szczodrzynski.edziennik.data.db.entity.LoginStore.LOGIN_TYPE_VULCAN; import static pl.szczodrzynski.edziennik.data.db.entity.LoginStore.LOGIN_TYPE_VULCAN;
import static pl.szczodrzynski.edziennik.data.db.entity.Metadata.TYPE_ATTENDANCE; import static pl.szczodrzynski.edziennik.data.db.entity.Metadata.TYPE_ATTENDANCE;
@ -147,38 +146,33 @@ public class AttendanceFragment extends Fragment {
} }
}*/ }*/
if (app.getProfile().getLoginStoreType() == LOGIN_TYPE_MOBIDZIENNIK) { b.attendanceSummarySubject.setOnClickListener((v -> {
b.attendanceSummarySubject.setVisibility(View.GONE); AsyncTask.execute(() -> {
} List<Subject> subjectList = App.db.subjectDao().getAllNow(App.Companion.getProfileId());
else { PopupMenu popupMenu = new PopupMenu(activity, b.attendanceSummarySubject, Gravity.END);
b.attendanceSummarySubject.setOnClickListener((v -> { popupMenu.getMenu().add(0, -1, 0, R.string.subject_filter_disabled);
AsyncTask.execute(() -> { int index = 0;
List<Subject> subjectList = App.db.subjectDao().getAllNow(App.Companion.getProfileId()); DecimalFormat format = new DecimalFormat("0.00");
PopupMenu popupMenu = new PopupMenu(activity, b.attendanceSummarySubject, Gravity.END); for (Subject subject: subjectList) {
popupMenu.getMenu().add(0, -1, 0, R.string.subject_filter_disabled); int total = subjectTotalCount.get(subject.id, new int[3])[displayMode];
int index = 0; int absent = subjectAbsentCount.get(subject.id, new int[3])[displayMode];
DecimalFormat format = new DecimalFormat("0.00"); if (total == 0)
for (Subject subject: subjectList) { continue;
int total = subjectTotalCount.get(subject.id, new int[3])[displayMode]; int present = total - absent;
int absent = subjectAbsentCount.get(subject.id, new int[3])[displayMode]; float percentage = (float)present / (float)total * 100.0f;
if (total == 0) String percentageStr = format.format(percentage);
continue; popupMenu.getMenu().add(0, (int)subject.id, index++, getString(R.string.subject_filter_format, subject.longName, percentageStr));
int present = total - absent; }
float percentage = (float)present / (float)total * 100.0f; popupMenu.setOnMenuItemClickListener((item -> {
String percentageStr = format.format(percentage); subjectIdFilter = item.getItemId();
popupMenu.getMenu().add(0, (int)subject.id, index++, getString(R.string.subject_filter_format, subject.longName, percentageStr)); b.attendanceSummarySubject.setText(item.getTitle().toString().replaceAll("\\s-\\s[0-9]{1,2}\\.[0-9]{1,2}%", ""));
} updateList();
popupMenu.setOnMenuItemClickListener((item -> { return true;
subjectIdFilter = item.getItemId(); }));
b.attendanceSummarySubject.setText(item.getTitle().toString().replaceAll("\\s-\\s[0-9]{1,2}\\.[0-9]{1,2}%", "")); new Handler(activity.getMainLooper()).post(popupMenu::show);
updateList(); });
return true;
}));
new Handler(activity.getMainLooper()).post(popupMenu::show);
});
})); }));
}
LinearLayoutManager linearLayoutManager = new LinearLayoutManager(getContext()); LinearLayoutManager linearLayoutManager = new LinearLayoutManager(getContext());